In [1]:
library(data.table)
library(ggplot2)
library(stringr)
library(dplyr)
library(reshape2)
source('../0_support-files/theme_CRP-MISC.R')
source('../0_support-files/prevail_man_fill.R')


'%ni%' <- Negate('%in%')


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘reshape2’


The following objects are masked from ‘package:data.table’:

    dcast, melt




---
## Read in master df and manipulate

In [2]:
# Read in data frame and filter samples
master_df = read.csv("../1_sample-data/STable8_cfdna-samples.csv") %>% 
    rename(scaled_cfDNA = cfDNA_concentration) %>%
    mutate(severity = ifelse(severity == 0 | severity == 1,"Mild/ASX",
                             ifelse(severity == 2 | severity == 3,"Moderate/Severe",
                                    ""))) %>%
    mutate(Diagnosis = ifelse(Diagnosis != "Control_Non-inflammatory", 
                              paste0("Acute ",Diagnosis," ",severity),
                                     Diagnosis)) %>%
    rename(Sample_ID = cfdna_sample_id) %>% 
    mutate(expGroup_plotting = Diagnosis)


# Scale each tissue fraction by scaled cfDNA
tissues = c('bcell','colon','dendritic','eosinophil','erythroblast','heart','hsc','kidney','liver','lung','macrophage','megakaryocyte','monocyte','neutrophil','nkcell','pancreas','progenitor','skin','spleen','tcell')
for (i in tissues){
    master_df[[paste0(i,"_conc")]] <- master_df[[i]] * master_df$scaled_cfDNA
}
    
# Calculate conc of subsets
solid_organ = c("colon","heart","kidney","liver","lung","pancreas","skin","spleen")
master_df$solid_organ <- rowSums(master_df[,solid_organ])
              
non_solid_organ = tissues[!(tissues %in% solid_organ)]
master_df$non_solid_organ <- rowSums(master_df[,non_solid_organ])

innate <- c("dendritic","eosinophil","macrophage","monocyte","neutrophil")
master_df$innate <- rowSums(master_df[,innate])

adaptive <- c("bcell","nkcell","tcell")
master_df$adaptive <- rowSums(master_df[,adaptive])

# Read in extended meta data
meta_ext <- read.csv("..//12_02_2021 Clinical data pull_Prevail plasma.csv")
meta_ext$record_id = toupper(meta_ext$record_id)

# Merge
master_df <- merge(master_df, meta_ext, by.x="PTID", by.y="record_id")

master_df <- master_df[which(master_df$Sample_ID != "PTCOV77"),]

master_df <- master_df[which(as.Date(master_df$Date) == as.Date(master_df$Date.of.V1.blood.collection,format="%m/%d/%y")),]

“cannot open file '../../0_metadata/12_02_2021 Clinical data pull_Prevail plasma.csv': No such file or directory”


ERROR: Error in file(file, "rt"): cannot open the connection


In [4]:
tissue_traits = c("scaled_cfDNA",tissues,paste0(tissues,"_conc"),"solid_organ","non_solid_organ","innate","adaptive")
clin_traits = c('WBC.max','ANC.max','ALC.min','Platelet.min','ESR.max','Na.min','Creatinine.max','ALT.max','BNP.max','Troponin.max','Ferritin.max','CRP.max','IL.2R.max')

tissue_traits <- tissue_traits[!grepl("skin|dendritic",tissue_traits)]
clin_traits <- clin_traits[!grepl("IL.2R",clin_traits)]

output = list()

for (var1 in tissue_traits){
    for (var2 in clin_traits){
       
        var1_val <- as.numeric(master_df[,var1])
        var2_val <- as.numeric(master_df[,var2])
                
        p_res <- cor.test(var1_val,var2_val,method="pearson")
        pearson = p_res$estimate 
        pearson_pval = p_res$p.value
        
        s_res <- cor.test(var1_val,var2_val,method="spearman",exact=FALSE)
        spearman =  s_res$estimate
        spearman_pval = s_res$p.value
        
        output[[paste0(var1,"_",var2)]] <- c("var1"=var1,
                                             "var2"=var2,
                                             "pearson"=pearson,
                                             "pearson_pval"=pearson_pval,
                                             "spearman" = spearman,
                                             "spearman_pval"=spearman_pval)
    }
}

df <- data.frame(do.call("rbind",output))

df <- df[which((df$pearson_pval < 0.05) & (df$spearman_pval < 0.05)),]

head(df[order(df$spearman.rho,decreasing=TRUE),],20)

Unnamed: 0_level_0,var1,var2,pearson.cor,pearson_pval,spearman.rho,spearman_pval
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
liver_conc_ALT.max,liver_conc,ALT.max,0.303464474917193,0.0360139360590835,0.504585736522006,0.0002550994555294
kidney_conc_Creatinine.max,kidney_conc,Creatinine.max,0.399212248695314,0.0044897069545301,0.498287067309989,0.0002689008974706
kidney_conc_CRP.max,kidney_conc,CRP.max,0.410382712361467,0.0041691221821721,0.461483967059044,0.0010946719357524
innate_ANC.max,innate,ANC.max,0.372822710474899,0.0098540588363161,0.454337833730372,0.0013364611668754
innate_WBC.max,innate,WBC.max,0.329736946521161,0.0220974603965574,0.450173686495875,0.0013251794405216
liver_ALT.max,liver,ALT.max,0.289150903713506,0.0462292173237288,0.446335986037706,0.0014737756506374
kidney_conc_BNP.max,kidney_conc,BNP.max,0.353103112542573,0.0160816833105917,0.434606486956295,0.0025437795151008
lung_conc_CRP.max,lung_conc,CRP.max,0.339341724274313,0.0196231713439081,0.395844320337196,0.0058837980549133
neutrophil_conc_ANC.max,neutrophil_conc,ANC.max,0.325833346941318,0.0254151521329729,0.37149465163342,0.0101406916895275
kidney_CRP.max,kidney,CRP.max,0.327640107110299,0.0245661800831886,0.365494838195165,0.0115267454641111


In [6]:
color_groups = c('COVID-19' = '#F0484E', 'MIS-C' = '#5CB2EB', 'Control_Non-inflammatory' = '#FBE77C',"MIS-C\nCNH" = '#2BC0B3')



---

In [8]:
make_plot <- function(var1,var2){

    master_df %>%
    ggplot(aes(x=as.numeric(.data[[var1]]),y=as.numeric(.data[[var2]]),color=Diagnosis))+
    geom_point(size = 0.75)+
    theme_alex()  + 
    theme(plot.title = element_blank(),
      axis.title.y = element_blank(),
      axis.title.x = element_blank(),
     ) + 
    scale_color_manual(values=color_groups)
    
    }

WIDTH = 1.48
HEIGHT = 1.50


##---------------------------
var1 = "liver_conc"
var2 = "ALT.max"

XLIM = 1.5
YLIM = 420

WIDTH = 1.48
HEIGHT = 1.50

pdf(file=paste0("plots/panelE_liver-ALT.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

plt <- make_plot(var1,var2) + 
# labs(x="Liver cfDNA Fraction",y="ALT.max", title = "pearson's r: 0.30\nspearman's p: 0.51" ) + 
scale_x_continuous(breaks = c(0,0.5,1.0,1.5))+
scale_y_continuous(breaks = c(0,140,280,420))+
coord_cartesian(ylim = c(0,YLIM),xlim = c(0,XLIM))

cat(var1)
cat(" + ")
cat(var2)
cat("\n")
print(df[which(df$var1 == var1 & df$var2 == var2),c("pearson.cor","pearson_pval")])
cat("--------------------\n")

print(plt)

dev.off()

##---------------------------

var1 = "kidney_conc"
var2 = "Creatinine.max"

XLIM = .225
YLIM = 4.5

WIDTH = 1.37
HEIGHT = 1.50

pdf(file=paste0("plots/panelE_kidneyconc-Creatinine.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

plt <- make_plot(var1,var2) +
scale_x_continuous(breaks = c(0,0.075,0.15,0.225))+
scale_y_continuous(breaks = c(0,1.5,3.0,4.5))+
coord_cartesian(ylim = c(0,YLIM),xlim = c(0,XLIM))

cat(var1)
cat(" + ")
cat(var2)
cat("\n")
print(df[which(df$var1 == var1 & df$var2 == var2),c("pearson.cor","pearson_pval")])
cat("--------------------\n")

print(plt)

dev.off()

# ##---------------------------

# var1 = "innate"
# var2 = "WBC.max"


# XLIM = 17
# YLIM = 35

# WIDTH = 1.42
# HEIGHT = 1.50

# pdf(file=paste0("plots/panelE_innate-WBC.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="white",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

# plt <- make_plot(var1,var2) #+ theme(plot.title = element_text(hjust = 0.5,size=6))

# cat(var1)
# cat(" + ")
# cat(var2)
# cat("\n")
# print(df[which(df$var1 == var1 & df$var2 == var2),c("pearson.cor","pearson_pval")])
# cat("--------------------\n")

# print(plt)

# dev.off()

##---------------------------

var1 = "scaled_cfDNA"
var2 = "CRP.max"


XLIM = 18
YLIM = 36

WIDTH = 1.42
HEIGHT = 1.50

pdf(file=paste0("plots/panelE_scaledcfDNA-CRP.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

plt <- make_plot(var1,var2) +
scale_x_continuous(breaks = c(0,6,12,18))+
scale_y_continuous(breaks = c(0,12,24,36))+
coord_cartesian(ylim = c(0,YLIM),xlim = c(0,XLIM))

cat(var1)
cat(" + ")
cat(var2)
cat("\n")
print(df[which(df$var1 == var1 & df$var2 == var2),c("pearson.cor","pearson_pval")])
cat("--------------------\n")

print(plt)

dev.off()

liver_conc + ALT.max
                         pearson.cor       pearson_pval
liver_conc_ALT.max 0.303464474917193 0.0360139360590835
--------------------


“Removed 4 rows containing missing values (geom_point).”


kidney_conc + Creatinine.max
                                 pearson.cor        pearson_pval
kidney_conc_Creatinine.max 0.399212248695314 0.00448970695453012
--------------------


“Removed 3 rows containing missing values (geom_point).”


scaled_cfDNA + CRP.max
                           pearson.cor       pearson_pval
scaled_cfDNA_CRP.max 0.288562492677635 0.0491738378796285
--------------------


“Removed 5 rows containing missing values (geom_point).”
