In [12]:
library(data.table)
library(ggplot2)
library(stringr)
library(dplyr)
library(reshape2)
source('~/theme_alex.R')
source('~/prevail_man_fill.R')
library(rstatix)
library(ggpubr)

# expGroupPalette = c('#00FFFF','#21aaf8','#d42069','#76d75b',
#                    "#00FF7F","#FF00FF","#FF7F00")


# names(expGroupPalette) <- c("MIS-C CNH","MIS-C","COVID-19","Control_Non-inflammatory",
#                            "Control","COVID-19_Nonsevere","COVID-19_Severe")


expGroupPalette <- c("Control" = "#00FF7F",
                            "COVID-19_Nonsevere" = "#7DB27D",
                            "COVID-19_Severe" = "#7DB27D",
                            "Control_Non-inflammatory" ='#FBE77C',
                            "Acute COVID-19 Mild/ASX" = '#F0484E',
                            "Acute COVID-19 Moderate/Severe" = '#F0484E',
                            "Acute MIS-C Moderate/Severe" = '#5CB2EB')


'%ni%' <- Negate('%in%')

---
## Read in master df and manipulate

In [13]:
ID_VARS = c("PTID","Sample_ID","Diagnosis","timepoint","expGroup_plotting","scaled_cfDNA")

TISS_COLS = c('bcell','colon','dendritic','eosinophil','erythroblast','heart','hsc','kidney','liver','lung','macrophage','megakaryocyte','monocyte','neutrophil','nkcell','pancreas','progenitor','skin','spleen','tcell')

XORDER <- c("Control","COVID-19_Nonsevere","COVID-19_Severe","Control_Non-inflammatory","COVID-19","MIS-C")


## Read in MISC data
master_df = read.csv("../1_sample-data/STable8_cfdna-samples.csv") %>% 
    rename(scaled_cfDNA = cfDNA_concentration) %>%
    mutate(severity = ifelse(severity == 0 | severity == 1,"Mild/ASX",
                             ifelse(severity == 2 | severity == 3,"Moderate/Severe",
                                    ""))) %>%
    mutate(Diagnosis = ifelse(Diagnosis != "Control_Non-inflammatory", 
                              paste0("Acute ",Diagnosis," ",severity),
                                     Diagnosis)) %>%
    rename(Sample_ID = cfdna_sample_id) %>% 
    mutate(expGroup_plotting = Diagnosis) %>%
    select(all_of(c(ID_VARS,TISS_COLS)))
                  
master_df <- master_df[match(unique(master_df$PTID),master_df$PTID),]


## Read in adult cohort
adult_mdf <- read.delim("../1_sample-data/cfdna_master.adult.tsv")%>%
    filter(origin == "MCGILL") %>%
    mutate(Diagnosis = severity) %>%
    mutate(expGroup_plotting = severity) %>%
    mutate(icu = 0) %>% 
    mutate(timepoint = "acute") %>%
    select(all_of(c(ID_VARS,TISS_COLS)))
                  
                  
## Combine
TOO_df <- rbind(master_df,adult_mdf)

## Melt and calculate TOO concentration
TOO_df_melt <- melt(TOO_df,id.vars = ID_VARS)
TOO_df_melt$value_abundance = TOO_df_melt$scaled_cfDNA * TOO_df_melt$value

## filter out only relevant columns
TOO_df_melt <- TOO_df_melt

TOO_df_melt$Diagnosis <- factor(TOO_df_melt$Diagnosis,levels=XORDER)
TOO_df_melt[which(is.na(TOO_df_melt$icu)),"icu"] <- 0
TOO_df_melt$icu <- factor(TOO_df_melt$icu)


TOO_df_melt$expGroup_plotting <- factor(TOO_df_melt$expGroup_plotting,levels = names(expGroupPalette))

---
## Plotting for paper figure

In [14]:
all_tissue_types <- unique(TOO_df_melt$variable)

solid_organ = c("colon","heart","kidney","liver","lung","pancreas","skin","spleen")
non_solid_organ = all_tissue_types[!(all_tissue_types %in% solid_organ)]

innate <- c("nkcell","dendritic","eosinophil","macrophage","monocyte","neutrophil")
# innate <- c("dendritic","eosinophil","macrophage","monocyte")

adaptive <- c("bcell","tcell")

##---------------------------------------
# Function 

make_boxplot <- function(group,text,df,YLIM){

TOO_df_sum <- df %>% 
filter(variable %in% group) %>%
group_by(Sample_ID,expGroup_plotting,icu) %>% 
summarize(sum = sum(value_abundance))
# summarize(sum = sum(value))



bxplt <- TOO_df_sum %>%
ggplot(aes(x=expGroup_plotting,y=sum)) + 
geom_boxplot(aes(fill=expGroup_plotting),outlier.shape = NA,size = 0.2, color="black")+
geom_point(aes(group=expGroup_plotting,fill=expGroup_plotting),position = position_jitterdodge(jitter.width=0.2,seed=100,jitter.height=0),size = 0.25,alpha=1)+       # ,pch=21 ,color = "black"
# annotate("text", x = 3, y = YLIM - .25, label = gname,size = 2.5,family = "Helvetica")+
    theme_alex()+
scale_fill_manual(values = expGroupPalette) +
scale_color_manual(values=c('0' = "black",
                               '1' = "black")) + 
scale_shape_manual(values=c('0'= 16,
                               '1'= 4))
    
    
##---------------------------------------
# ADD ARROWS    

group = groups_other[[gname]]

outliers <- TOO_df_sum%>% 
        filter(sum > YLIM)

subset <- unique(TOO_df_melt$expGroup_plotting)

offset_x = 1  # offset for different condition / plotting names
offset_y = .20     # offset from top for different outliers in same condition

XORDER <- list('Control \nn=4'= 1 * offset_x,
            'COVID-19 \nNonsevere \nn=30'= 2 * offset_x,
            'COVID-19 \nSevere \nn=22'= 3 * offset_x,
            'Control \nn=5'= 4 * offset_x,
            'COVID-19 \n acute \nn=18'= 5 * offset_x,
            'MIS-C \n acute \nn=40'= 6 * offset_x)
    
XORDER <- list("Control" = 1 * offset_x,
    "COVID-19_Nonsevere" = 2 * offset_x,
    "COVID-19_Severe" = 3 * offset_x,
    "Control Healthy n=6"= 4 * offset_x,
    "Acute COVID-19 Mild/ASX n=10" = 5 * offset_x,
    "Acute COVID-19 Moderate/Severe n=11" = 6 * offset_x,
    "Acute MIS-C Moderate/Severe n=41" = 7 * offset_x)

y_stop = YLIM 
y_start= YLIM - (YLIM*0.05)

# old text align: y_start - (y_start-y_stop)/2

text_offset = 0.09
text_size = 1.5
text_lineheight = .75

arrow_size = 0.5

for (i  in 1:length(subset)){
    group_name <- subset[i]
    group_outliers <- outliers %>% filter(expGroup_plotting == group_name)

    ## ADD ARROW
    if (nrow(group_outliers) > 0){

        outlier_values <- round(group_outliers[, "sum",drop=TRUE],2)
        outlier_values <- as.character(outlier_values[order(outlier_values,decreasing=TRUE)])

        text_center <- y_start * (1 - (.01*(length(outlier_values)-1)))

        XVAL <- XORDER[[group_name]]


        bxplt <- bxplt + annotate("segment", x = XVAL, xend = XVAL,
                                          y = YLIM-.25, yend = YLIM,
                                          size = arrow_size, lineend="butt", linejoin="mitre", arrow=arrow(length=unit(.05,"npc")))


        for (ii in 1:length(outlier_values)){

            text_color = "black"


            if (ii == 1){YVAL = YLIM
                        }else { 
                YVAL = YLIM- ((ii-1)*offset_y)
            }

#                 print(YVAL)

            VAL <- outlier_values[ii]
            bxplt <- bxplt + annotate("text", x = XVAL, y = YVAL, hjust=1.5,vjust=.1,
                                          label = VAL,
                                          size = text_size, family = "Helvetica", lineheight = text_lineheight,color = text_color)


        }}}
    
    ##---------------------------------------
    # ADD PVALS
    
    #YLIM
    stat.test <- data.frame(TOO_df_sum) %>%
        wilcox_test(sum ~ expGroup_plotting, paired = FALSE) %>% 
        adjust_pvalue(method = "BH") %>% 
        add_significance("p.adj") #%>% 
#         add_xy_position(x = "expGroup_plotting") %>% filter( (group1 %in% c("Control Non-inflammatory","Acute MIS-C Moderate/Severe") & group2 %in%c("Control Non-inflammatory","Acute MIS-C Moderate/Severe")) |
#                                                             (group1 %in% c("Control Non-inflammatory","Acute COVID-19 Moderate/Severe") & group2 %in% c("Control Non-inflammatory","Acute COVID-19 Moderate/Severe")) |
#                                                             (group1 %in% c("Control Non-inflammatory","Acute COVID-19 Mild/ASX") & group2 %in% c("Control Non-inflammatory","Acute COVID-19 Mild/ASX")) |
#                                                             (group1 %in% c("Acute COVID-19 Moderate/Severe","Acute COVID-19 Mild/ASX") & group2 %in% c("Acute COVID-19 Moderate/Severe","Acute COVID-19 Mild/ASX")) |
#                                                             (group1 %in% c("Acute COVID-19 Mild/ASX", "Acute MIS-C Moderate/Severe") & group2 %in%  c("Acute COVID-19 Mild/ASX", "Acute MIS-C Moderate/Severe")) |
#                                                             (group1 %in% c("Acute COVID-19 Moderate/Severe","Acute MIS-C Moderate/Severe") & group2 %in%  c("Acute COVID-19 Moderate/Severe","Acute MIS-C Moderate/Severe")))
#                                                             (group1 %in% & group2 == ) |
    
    
#     return(stat.test)
    
#     keeper1 <- c("Control Healthy n=6","Acute COVID-19 Mild/ASX n=10",
#              "Acute COVID-19 Moderate/Severe n=11","Acute MIS-C Moderate/Severe n=41")

#     stat.test <- stat.test %>% filter(group1 %in% all_of(keeper1) & group2 %in% all_of(keeper1))
    
#     stat.test <- stat.test %>% arrange(desc(y.position))
#     stat.test$rank <- c(1:nrow(stat.test))
#     stat.test$y.position <- YLIM - ((0.05*stat.test$rank)*YLIM)
    
        
#     bxplt <- bxplt +  stat_pvalue_manual(stat.test, label = "p.adj.signif")

return(bxplt)
}

#### adaptive and innate

In [15]:
groups_other<- list("adaptive"=adaptive,
               "innate"=innate)


WIDTH = 1.76
HEIGHT = 1.3


YLIM = 1.3
gname = "adaptive"

pdf(file=paste0("plots/panelC_",gname,"c.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

    final_plt <- make_boxplot(groups_other[[gname]],gname,TOO_df_melt,YLIM)+
        theme(axis.title.x = element_blank(),
            axis.title.y = element_blank())+
        labs(x="",y="")+
        coord_cartesian(ylim = c(0,YLIM)) +
        scale_y_continuous(breaks = c(0,0.3,0.6,0.9))
    
    print(final_plt)
    dev.off()



YLIM = 4.2
gname = "innate"

WIDTH = 1.696



pdf(file=paste0("plots/panelC_",gname,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

    final_plt <- make_boxplot(groups_other[[gname]],gname,TOO_df_melt,YLIM)+
        theme(axis.title.x = element_blank(),
            axis.title.y = element_blank())+
        labs(x="",y="")+
        coord_cartesian(ylim = c(0,YLIM)) +
        scale_y_continuous(breaks = c(0,1.0,2.0,3.0))
    
    print(final_plt)
    dev.off()


[1m[22m`summarise()` has grouped output by 'Sample_ID', 'expGroup_plotting'. You can override using the `.groups`
argument.


[1m[22m`summarise()` has grouped output by 'Sample_ID', 'expGroup_plotting'. You can override using the `.groups`
argument.


In [17]:
gname = "adaptive"
# gname = "innate"


GROUP = "Acute COVID-19 Mild/ASX"


group = groups_other[[gname]]

TOO_df_sum <- TOO_df_melt %>% 
    filter(variable %in% group) %>%
    group_by(Sample_ID,expGroup_plotting,icu) %>% 
    summarize(sum = sum(value_abundance)) %>% ungroup()

# TOO_df_sum %>% head()

stat.test <- TOO_df_sum%>% 
    wilcox_test(sum ~ expGroup_plotting, paired = FALSE) %>% 
    adjust_pvalue(method = "BH") %>% 
    add_significance("p.adj") %>% 
    add_xy_position(x = "expGroup_plotting") %>% 
    filter(group1 %in% c(GROUP) | group2 %in% c(GROUP)  )

stat.test

[1m[22m`summarise()` has grouped output by 'Sample_ID', 'expGroup_plotting'. You can
override using the `.groups` argument.


.y.,group1,group2,n1,n2,statistic,p,p.adj,p.adj.signif,y.position,groups,xmin,xmax
<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<named list>,<dbl>,<dbl>
sum,Control,Acute COVID-19 Mild/ASX,4,10,6,0.054,0.1134,ns,2.691906,"Control , Acute COVID-19 Mild/ASX",1,5
sum,COVID-19_Nonsevere,Acute COVID-19 Mild/ASX,30,10,72,0.015,0.07875,ns,3.824016,"COVID-19_Nonsevere , Acute COVID-19 Mild/ASX",2,5
sum,COVID-19_Severe,Acute COVID-19 Mild/ASX,22,10,78,0.2,0.28,ns,4.729704,"COVID-19_Severe , Acute COVID-19 Mild/ASX",3,5
sum,Control Non-inflammatory,Acute COVID-19 Mild/ASX,5,10,11,0.095,0.1813636,ns,5.40897,"Control Non-inflammatory, Acute COVID-19 Mild/ASX",4,5
sum,Acute COVID-19 Mild/ASX,Acute COVID-19 Moderate/Severe,10,11,67,0.417,0.4865,ns,6.088236,"Acute COVID-19 Mild/ASX , Acute COVID-19 Moderate/Severe",5,6
sum,Acute COVID-19 Mild/ASX,Acute MIS-C Moderate/Severe,10,41,219,0.748,0.7854,ns,6.314658,"Acute COVID-19 Mild/ASX , Acute MIS-C Moderate/Severe",5,7
