# Immunosuppressive drug exposure ~ EBV DNAemia

Clean up query results in `03_Immunosuppressive_drug_exposure` and plot associations.

In [None]:
library(data.table)
library(dplyr)
library(BuenColors)

In [None]:
immsup_df <- fread("immunosuppressive_drugs_df.csv")

In [None]:
head(immsup_df)

In [None]:
all_drugs <- unique(immsup_df$drug_concept_id)

In [None]:
length(all_drugs) # 53

In [None]:
immsup_df %>% dplyr::select(drug_concept_id, drug_source_value) %>% unique() %>% fwrite("all_drugs_aou.csv")

In [None]:
# get has_ebv 
ebv_df <- fread("../intermediate/ebv_equivalent_30x.csv")
ebv_df$has_ebv <- as.integer(ebv_df$ebv_q30_30x > 0.0018) 

In [None]:
# get sex_at_birth and biosample collection date
gm <- data.frame(fread("../data/genomic_metrics.tsv")) %>% 
        rename(person = research_id)
gm <- gm %>% dplyr::select(person, sex_at_birth, biosample_collection_date)
gm[gm == ""] <- NA

ebv_df <- ebv_df %>% dplyr::left_join(gm, by = "person")

In [None]:
# get date of birth
demo_df <- fread("demographic_df.csv")
demo_df_clean <- demo_df %>% dplyr::select(person_id, birth_datetime)
ebv_df <- ebv_df %>% dplyr::left_join(demo_df_clean, by = c("person" = "person_id"))

In [None]:
library(lubridate)
ebv_df <- ebv_df %>%
dplyr::mutate(age = floor(interval(start = birth_datetime, end = biosample_collection_date)
                          / duration(num = 1, units = 'years'))) %>%
dplyr::select(-birth_datetime)

In [None]:
ebv_df[ebv_df==""] <- NA

In [None]:
immsup_df_clean <- immsup_df %>% dplyr::select(person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_end_date)

In [None]:
immsup_df_clean <- immsup_df_clean %>%
dplyr::inner_join(ebv_df, by = c("person_id" = "person"))

In [None]:
# Replace "" with NA only in character columns
char_cols <- names(immsup_df_clean)[sapply(immsup_df_clean, is.character)]
immsup_df_clean[, (char_cols) := lapply(.SD, function(x) fifelse(x == "", NA_character_, x)), .SDcols = char_cols]

In [None]:
# Check if biosample_date falls between start_date and end_date
immsup_df_clean <- immsup_df_clean %>%
dplyr::mutate(
in_range = as.integer(biosample_collection_date >= drug_exposure_start_date & 
    (is.na(drug_exposure_end_date) | biosample_collection_date <= drug_exposure_end_date))
)

In [None]:
immsup_IDs <- immsup_df_clean %>% dplyr::filter(in_range == 1) %>% pull(person_id) %>% unique()

In [None]:
length(immsup_IDs) # 3725

In [None]:
ebv_df_all <- ebv_df %>%
dplyr::mutate(immsup = as.integer(person %in% immsup_IDs))

In [None]:
fisher.test(table(ebv_df_all$immsup, ebv_df_all$has_ebv))

In [None]:
df_immsup_sex <- ebv_df_all %>% 
  dplyr::group_by(sex_at_birth, immsup) %>%
  dplyr::summarise(ebv_positive_percent = mean(has_ebv == 1) * 100, 
                   var = var( (has_ebv == 1)), hits = sum(has_ebv == 1) ,
                   count = n()) %>% mutate(sem = (ebv_positive_percent)/sqrt(var*count))

In [None]:
df_immsup_sex <- df_immsup_sex %>% na.omit()

In [None]:
pimmsupsex <- ggplot(df_immsup_sex, aes(x = sex_at_birth, y = ebv_positive_percent, fill = as.factor(immsup))) +
  geom_bar(stat = "identity", color = "black", position = position_dodge()) +
  labs(x = "Sex at birth", y = "% EBV DNA+") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  + 
  pretty_plot(fontsize = 8) + L_border() + theme(legend.position = "none") +
  geom_errorbar(aes( ymin=ebv_positive_percent-sem, ymax=ebv_positive_percent+sem), width=0.2, position = position_dodge(.9)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = c("lightgrey", "darkgrey"))

In [None]:
cowplot::ggsave2(pimmsupsex, file = "plots/EBV_Immunosuppressed.pdf", width = 1.8, height = 1.8)