### Look into adding control non-inflammtories from UCSF to the "controls" group

In [1]:
library(tidyverse)
library(data.table)
library(ggplot2)
library(palettetown)
library(stringr)
library(dplyr)
library(ggpattern)
source('../0_support-files/theme_CRP-MISC.R')
source('../0_support-files/prevail_man_fill.R')
library(ggpubr)
library(rstatix)




expGroupPalette <- c("Control" = "#00FF7F",
                            "COVID-19_Nonsevere" = "#7DB27D",
                            "COVID-19_Severe" = "#7DB27D",
                            "Control Non-inflammatory" ='#FBE77C',
                            "Acute COVID-19 Mild/ASX" = '#F0484E',
                            "Acute COVID-19 Moderate/Severe" = '#F0484E',
                            "Acute MIS-C Moderate/Severe" = '#5CB2EB')



'%ni%' <- Negate('%in%')

── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


The following ob

In [2]:
solid_organ = c("colon","heart","kidney","liver","lung","pancreas","skin","spleen")


expGroupPalette <- c("Control" = "#00FF7F",
                            "COVID-19_Nonsevere" = "#7DB27D",
                            "COVID-19_Severe" = "#7DB27D",
                            "Control_Non-inflammatory" ='#FBE77C',
                            "Acute COVID-19 Mild/ASX" = '#F0484E',
                            "Acute COVID-19 Moderate/Severe" = '#F0484E',
                            "Acute MIS-C Moderate/Severe" = '#5CB2EB')


#------------------------------
## Read in MISC data
master_df = read.csv("../1_sample-data/STable8_cfdna-samples.csv") %>% 
    rename(scaled_cfDNA = cfDNA_concentration) %>%
    mutate(severity = ifelse(severity == 0 | severity == 1,"Mild/ASX",
                             ifelse(severity == 2 | severity == 3,"Moderate/Severe",""))) %>%
    mutate(Diagnosis = ifelse(Diagnosis != "Control_Non-inflammatory", paste0("Acute ",Diagnosis," ",severity),Diagnosis)) %>%
    rename(Sample_ID = cfdna_sample_id) %>% 
    select(Sample_ID,PTID,Diagnosis,scaled_cfDNA,all_of(solid_organ),severity)

#------------------------------
## Read in adult cohort
adult_mdf <- read.delim("../1_sample-data/cfdna_adult_master-df.tsv") %>%
    filter(origin == "MCGILL") %>%
    mutate(Diagnosis = severity) %>%
    mutate(expGroup_plotting = Diagnosis) %>%
    mutate(icu = 0) %>%
    select(Sample_ID,PTID,Diagnosis,scaled_cfDNA,all_of(solid_organ),severity)

# head(adult_mdf)

#------------------------------
## Combine
all_abd = rbind(adult_mdf,master_df)

#------------------------------
## Filter and Re-factor


all_abd$Diagnosis <- factor(all_abd$Diagnosis, levels=names(expGroupPalette))

# all_abd$icu <- factor(all_abd$icu)


# ## Add solid tissue part

all_abd$solid_organ_frac <- rowSums(all_abd[,colnames(all_abd) %in% solid_organ])
all_abd$solid_organ_conc <- all_abd$solid_organ_frac * all_abd$scaled_cfDNA

all_abd$expGroup_plotting <- all_abd$Diagnosis
# all_abd$expGroup_plotting <- factor(all_abd$expGroup_plotting, levels=names(expGroupPalette))



---
# FOR PUB

In [3]:
all_abd <- all_abd %>% select(expGroup_plotting,Sample_ID,solid_organ_conc,scaled_cfDNA) %>% reshape2::melt(id.vars= c("expGroup_plotting","Sample_ID"))

all_abd$expGroup_plotting <- factor(all_abd$expGroup_plotting, levels = names(expGroupPalette))
head(all_abd)

Unnamed: 0_level_0,expGroup_plotting,Sample_ID,variable,value
Unnamed: 0_level_1,<fct>,<chr>,<fct>,<dbl>
1,COVID-19_Nonsevere,MCGILL1,solid_organ_conc,0.005827706
2,COVID-19_Nonsevere,MCGILL20,solid_organ_conc,0.072144049
3,COVID-19_Nonsevere,MCGILL21,solid_organ_conc,0.026977047
4,COVID-19_Nonsevere,MCGILL25,solid_organ_conc,0.008103787
5,COVID-19_Nonsevere,MCGILL27,solid_organ_conc,0.00671951
6,COVID-19_Nonsevere,MCGILL26,solid_organ_conc,0.007427791


In [4]:
all_abd %>% select(Sample_ID, expGroup_plotting) %>% unique() %>% pull(expGroup_plotting) %>% table()

.
                       Control             COVID-19_Nonsevere 
                             4                             30 
               COVID-19_Severe       Control_Non-inflammatory 
                            22                              3 
       Acute COVID-19 Mild/ASX Acute COVID-19 Moderate/Severe 
                            10                             11 
   Acute MIS-C Moderate/Severe 
                            41 

In [5]:
pdf(file="plots/panelB.pdf",
                 width=3.47,height=2.25, paper="special", bg="white",
                 fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

# XORDER <- c('Control \nn=4','COVID-19 \nNonsevere \nn=30','COVID-19 \nSevere \nn=22',
#             'Control \nn=5','COVID-19 \n acute \nn=18','MIS-C \n acute \nn=40')

YLIM = 10

final_plt <- all_abd %>% 
# mutate(Diagnosis = factor(Diagnosis,levels = XORDER)) %>%
ggplot(aes(x=expGroup_plotting,
           y=value,
           fill=expGroup_plotting,
           pattern=variable))+ #fill=Diagnosis,
    geom_boxplot(outlier.shape = NA,
                 size = 0.1)+                     # place holder to set discrete scale
    theme_prevail()+
    theme(plot.margin = unit(c(2,2,2,2), "pt"),
            axis.title.x = element_blank())+
    # scale_fill_poke(pokemon = 137, spread = 6)+
#     coord_cartesian(ylim = c(0,YLIM))+
    annotate(geom="rect",
             xmin = 3.5,
             xmax = Inf, 
             ymin = 0, 
             ymax = Inf, 
             alpha = .2)+              # add rect
    geom_boxplot_pattern(outlier.shape = NA,
                         size = 0.2,
                         position = position_dodge(preserve = "single"),
                         color = "black",
                         pattern_size=0.2,
                         pattern_colour = "black",
                         pattern_fill = "white",
                         pattern_angle = 45,
                         pattern_density = 0.1,
                         pattern_spacing = 0.025,
                         pattern_key_scale_factor = 0.6) +
    geom_point(position =  position_jitterdodge(jitter.height=0, 
                                                jitter.width=.25, 
                                                seed=42),
                size = 0.4)+
    scale_fill_manual(values = expGroupPalette) +
    scale_pattern_manual(values = c(solid_organ_conc = "stripe", 
                                    scaled_cfDNA = "none"))+
    coord_cartesian(ylim = c(0,YLIM))+
    scale_y_continuous(breaks = c(2,4,6,8))

##---------------------------------------
# ADD ARROWS

subset <- names(expGroupPalette)

mapping_key <- c(1:length(subset))
names(mapping_key) <- subset

outliers <- all_abd %>% filter(value > YLIM)

text_offset = 0.09
text_size = 2.25
text_lineheight = .75

arrow_size = 0.5

for (i  in 1:length(subset)){
    group <- subset[i]
    group_outliers <- outliers %>% filter(expGroup_plotting == group)
    
    ## ADD ARROW
    if (nrow(group_outliers) > 0){
        
        outlier_values <- round(group_outliers$value,2)
        outlier_values <- as.character(outlier_values[order(outlier_values,decreasing=TRUE)])
                
        final_plt <- final_plt + annotate("segment", x = i, xend = i,
                                          y = 7.5, yend = 8,
                                          size = arrow_size, lineend="butt", linejoin="mitre", arrow=arrow(length=unit(.02,"npc")))

        final_plt <- final_plt + annotate("text", x = i+text_offset, y = YLIM, hjust=0,vjust=1.2,
                                          label = paste0(outlier_values,collapse="\n"),
                                          size = text_size, family = "Helvetica", lineheight = text_lineheight)

        
    }
}

print(final_plt)
dev.off()

In [11]:

GROUP = "Acute MIS-C Moderate/Severe"

stat.test <- data.frame(all_abd) %>% 
    filter(variable == "solid_organ_conc") %>%
    # filter(variable == "scaled_cfDNA") %>%
    wilcox_test(value ~ expGroup_plotting, paired = FALSE) %>% 
    adjust_pvalue(method = "BH") %>% 
    add_significance("p.adj") %>% 
    add_xy_position(x = "expGroup_plotting") %>% 
    filter(group1 %in% c(GROUP) | group2 %in% c(GROUP)  )

stat.test

.y.,group1,group2,n1,n2,statistic,p,p.adj,p.adj.signif,y.position,groups,xmin,xmax
<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<named list>,<dbl>,<dbl>
value,Control,Acute MIS-C Moderate/Severe,4,41,0,1.34e-05,5.005e-05,****,14.703,"Control , Acute MIS-C Moderate/Severe",1,7
value,COVID-19_Nonsevere,Acute MIS-C Moderate/Severe,30,41,10,2.9e-18,6.09e-17,****,19.995,"COVID-19_Nonsevere , Acute MIS-C Moderate/Severe",2,7
value,COVID-19_Severe,Acute MIS-C Moderate/Severe,22,41,109,1.06e-07,7.42e-07,****,24.2286,"COVID-19_Severe , Acute MIS-C Moderate/Severe",3,7
value,Control_Non-inflammatory,Acute MIS-C Moderate/Severe,3,41,6,0.003,0.005727273,**,27.4038,"Control_Non-inflammatory , Acute MIS-C Moderate/Severe",4,7
value,Acute COVID-19 Mild/ASX,Acute MIS-C Moderate/Severe,10,41,11,4.42e-06,2.3205e-05,****,29.5206,"Acute COVID-19 Mild/ASX , Acute MIS-C Moderate/Severe",5,7
value,Acute COVID-19 Moderate/Severe,Acute MIS-C Moderate/Severe,11,41,153,0.107,0.1321765,ns,30.579,"Acute COVID-19 Moderate/Severe, Acute MIS-C Moderate/Severe",6,7


In [8]:
all_abd  %>% head()

Unnamed: 0_level_0,expGroup_plotting,Sample_ID,variable,value
Unnamed: 0_level_1,<fct>,<chr>,<fct>,<dbl>
1,COVID-19_Nonsevere,MCGILL1,solid_organ_conc,0.005827706
2,COVID-19_Nonsevere,MCGILL20,solid_organ_conc,0.072144049
3,COVID-19_Nonsevere,MCGILL21,solid_organ_conc,0.026977047
4,COVID-19_Nonsevere,MCGILL25,solid_organ_conc,0.008103787
5,COVID-19_Nonsevere,MCGILL27,solid_organ_conc,0.00671951
6,COVID-19_Nonsevere,MCGILL26,solid_organ_conc,0.007427791


In [8]:
all_abd %>% group_by(expGroup_plotting,variable) %>% summarize(mean = mean(value))

[1m[22m`summarise()` has grouped output by 'expGroup_plotting'. You can override using the `.groups` argument.


expGroup_plotting,variable,mean
<fct>,<fct>,<dbl>
Control,solid_organ_conc,0.001773368
Control,scaled_cfDNA,0.022023988
COVID-19_Nonsevere,solid_organ_conc,0.023256854
COVID-19_Nonsevere,scaled_cfDNA,0.099895738
COVID-19_Severe,solid_organ_conc,0.151674988
COVID-19_Severe,scaled_cfDNA,0.921144724
Control_Non-inflammatory,solid_organ_conc,0.108054663
Control_Non-inflammatory,scaled_cfDNA,0.723333333
Acute COVID-19 Mild/ASX,solid_organ_conc,0.053308163
Acute COVID-19 Mild/ASX,scaled_cfDNA,1.1485
