In [1]:
suppressMessages(require(tidyverse))
suppressMessages(library(DESeq2))

suppressMessages(require(pheatmap))
suppressMessages(require(RColorBrewer))
suppressMessages(require(dendextend))
suppressMessages(require(ComplexHeatmap))
suppressMessages(require(circlize))

suppressMessages(library(data.table))
suppressMessages(library(rstatix))
suppressMessages(library(ggpubr))

suppressMessages(library(grid))
suppressMessages(library(gridExtra))


source("../0_support-files/theme_CRP-MISC.R")
source("./fig2_functions.r")

annotation <- fread(file="../0_support-files/gencode.biotype.name.key.tsv")

In [4]:
##------------------------------------
## Load results and metadata
##------------------------------------

covid_control <- readRDS("./daa_output/covid-control.rds")
misc_control <- readRDS("./daa_output/misc-control.rds")
misc_covid <- readRDS("./daa_output/misc-covid.rds")


## Load metadata
meta_data_all <- read.delim("../1_sample-data/STable6_cfrna-samples.tsv") 

## clean up ivig timing
meta_data_all[is.na(meta_data_all$ivig_rel_samp),"ivig_rel_samp"] <- "noivig"


## Read ftcount matrix
raw_ftcount <- read.delim("../1_sample-data/cfrna_ftcounts.txt",row.names=c(1))
nrow(raw_ftcount)

gene.list <- read.delim("../0_support-files/genelist.hs.tsv",col.names = c("type,","ENSMBL","gene_symbol"))
gene.ids <- gsub("\\..*","",rownames(raw_ftcount))

exclude.idx <- gene.ids %in% gene.list[,2]
raw_ftcount = raw_ftcount[!exclude.idx,]          

cpm_ftcount <- edgeR::cpm(raw_ftcount)

---
## Panel F - heatmaps

In [3]:
SIG_THRESH = 0.01
FC_THRESH = 1.5

misc_covid[['res']] %>% data.frame() %>% filter(padj < SIG_THRESH) %>% filter(abs(log2FoldChange) > FC_THRESH) %>% nrow()

In [4]:
##------------------------------------
## Plot
##------------------------------------
SIG_THRESH = 0.01
FC_THRESH = 1.5
ANNOTATIONS <- c("Diagnosis","severity")

WIDTH = 1.6
HEIGHT = 2.6

RES = 300


covid_control_plt <- create_heatmap(covid_control,cpm_ftcount,meta_data_all)
misc_control_plt <- create_heatmap(misc_control,cpm_ftcount,meta_data_all)
misc_covid_plt <- create_heatmap(misc_covid,cpm_ftcount,meta_data_all)


##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "COVID19"
       
png(file=paste0("plots/panelF_covid19-control_heatmap.cfrna.png"),
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(covid_control_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "MISC"

png(file=paste0("plots/panelF_misc-control_heatmap.cfrna.png"),
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(misc_control_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "MISC_COVID19"

png(file=paste0("plots/panelF_misc-covid19_heatmap.cfrna.png"),
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(misc_covid_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

[1] 219
[1] 1215
[1] 89


---
## Panel G - biomarkers

In [6]:
##------------------------------------
## Prep data
##------------------------------------

annotation <- fread(file="../0_support-files/gencode.biotype.name.key.tsv")

keeper_columns <- c("gene_name","gene_type","baseMean","padj","log2FoldChange")

# color_groups = c('COVID-19\ndiscovery' = '#C1272D', 'MIS-C\ndiscovery' = '#0000A7', 'Control_Non-inflammatory\ndiscovery' = '#EECC16',"MIS-C\nvalidation" = '#008176')
color_groups = c('COVID-19\ndiscovery' = '#F0484E', 'MIS-C\ndiscovery' = '#5CB2EB', 
                 'Control_Non-inflammatory\ndiscovery' = '#FBE77C',"MIS-C\nvalidation" = '#2BC0B3')

counts_cpm <- cpm_ftcount %>% t()

samples <- meta_data_all %>% 
    filter((Diagnosis %in% c("MIS-C","COVID-19") & timepoint == "acute") | Diagnosis == "Control_Non-inflammatory")


##------------------------------------
## PLOT
##------------------------------------

set.seed(42)

GENE = "TGM2"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 700

pdf(file=paste0("plots/panelG_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,240,480,700)) + 
    coord_cartesian(ylim = c(0,YLIM))
dev.off()



GENE = "AKAP12"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 4200

pdf(file=paste0("plots/panelG_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
    scale_y_continuous(breaks = c(0,1400,2800,4200)) + 
    coord_cartesian(ylim = c(0,YLIM)) 
dev.off()


GENE = "GAS7"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 510

pdf(file=paste0("plots/SupPanelF_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,170,340,510)) + 
    coord_cartesian(ylim = c(0,YLIM))
dev.off()


GENE = "VAT1"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 510

pdf(file=paste0("plots/SupPanelF_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,170,340,510)) + 
    coord_cartesian(ylim = c(0,YLIM))
dev.off()





GENE = "CMPK2"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 420

pdf(file=paste0("plots/",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
    scale_y_continuous(breaks = c(0,140,280,420)) +
    coord_cartesian(ylim = c(0,YLIM)) 
dev.off()


GENE = "RSAD2"
YLIM = 420

pdf(file=paste0("plots/",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)
get_fig_plot(GENE,samples,counts_cpm,YLIM) + 
        scale_y_continuous(breaks = c(0,140,280,420)) + 
        coord_cartesian(ylim = c(0,YLIM)) 
dev.off()

[1] "num outliers:"
[1] 3


[1] "num outliers:"
[1] 1


[1] "num outliers:"
[1] 0


[1] "num outliers:"
[1] 1


[1] "num outliers:"
[1] 0


[1] "num outliers:"
[1] 1


---
## SupPanel E - TOO x DAA clustering

In [7]:
colLab<<-function(n,PatientDate_df){
    if(is.leaf(n)){
        
#         print(attributes(n)$label)
        
        #I take the current attributes
        a=attributes(n)
        
        #I deduce the line in the groupal data, and so the treatment and the specie.
        ligne=match(attributes(n)$label,cf_meta$cfrna_sample_id)
        Diagnosis=cf_meta[ligne,]$Diagnosis;
            if(Diagnosis=="COVID-19"){col_diag="#740C33"};if(Diagnosis=="MIS-C"){col_diag="#0234C1"};if(Diagnosis=="Control_Non-inflammatory"){col_diag="#4b7a47"};if(Diagnosis=="Control_Inflammatory"){col_diag="purple"}
        #Modification of leaf attribute
        attr(n,"nodePar")<-c(a$nodePar,list(cex=1.5,lab.cex=1,pch=20,col=col_diag,lab.col=col_diag,lab.font=1,lab.cex=1))
        }
    return(n)
}


get_hclust <- function(IDS,sig_genes,meta,ftcounts){
#     ANNOTATIONS <- c("Diagnosis","severity",'pcr_positive_MC_1yes','antibody_positive_MC_1yes','kd_like_MC_1yes','req_vasopressors_inotropes_MC_1yes')
    ANNOTATIONS <- c("Diagnosis","severity")



    if (is.null(meta$cfrna_sample_id)) {meta$cfrna_sample_id <- meta$SEQ_ID..UCSFonly.}
    
    metadata <- meta %>% 
    filter(cfrna_sample_id %in% all_of(IDS)) %>%                                 ###
    mutate(PTID_DATE = gsub("-","\\.",PTID_DATE)) %>%
    column_to_rownames(var = "cfrna_sample_id") %>%  
    mutate(severity = ifelse(grepl("ontrol",severity),NA,severity)) %>%
    select(all_of(ANNOTATIONS))

    colnames(metadata) <- c("Diagnosis","Severity","PCR","Antibody")

    ###----------------------------------
    ## Subset count matrix
    mat <- data.frame(ftcounts) %>% 
            filter(row.names(ftcounts) %in% all_of(sig_genes)) %>% 
            select(all_of(rownames(metadata))) %>% 
            as.matrix()

    mat <- t(scale(t(mat)))

    ###----------------------------------
    ## Perform clustering

    h <- hclust(as.dist(1 - cor(mat, method = "pearson", use = 'pairwise.complete.obs')))
    return(h)
}



get_dend <- function(IDS,sig_genes,meta,ftcounts){
    
    ANNOTATIONS <- c("Diagnosis","severity")

    if (is.null(meta$cfrna_sample_id)) {meta$cfrna_sample_id <- meta$SEQ_ID..UCSFonly.}
    
    metadata <- meta %>% 
    filter(cfrna_sample_id %in% all_of(IDS)) %>%                                 ###
    column_to_rownames(var = "cfrna_sample_id") %>%  
    mutate(severity = ifelse(grepl("ontrol",severity),NA,severity)) %>%
    select(all_of(ANNOTATIONS))

    colnames(metadata) <- c("Diagnosis","Severity")

    ###----------------------------------
    ## Subset count matrix
    mat <- data.frame(ftcounts) %>% 
            filter(row.names(ftcounts) %in% all_of(sig_genes)) %>% 
            select(all_of(rownames(metadata))) %>% 
            as.matrix()

    mat <- t(scale(t(mat)))

    ###----------------------------------
    ## Perform clustering

    h <- hclust(as.dist(1 - cor(mat, method = "pearson", use = 'pairwise.complete.obs')))
#     h <- hclust(as.dist(mat))

    dend <- as.dendrogram(h)

    dendL <- dendrapply(dend, colLab,PatientDate_df)

    return(dendL)
}

In [11]:
###----------------------------------

PVAL_THRESH = 0.01
LOGFC_THRESH = 1.5

cf_meta <- meta_data_all
cf_ftcounts <- cpm_ftcount

##------------------------------------
## Make dendograms & cluster
##------------------------------------

## MISC vs COVID
cf_tmp_rds <- misc_covid

IDS_cf <- colnames(cf_tmp_rds[['dds']])

sig_genes <- data.frame(cf_tmp_rds[['res']]) %>% filter(padj < PVAL_THRESH & abs(log2FoldChange) > LOGFC_THRESH) %>% rownames()
misc_covid_dendL = get_dend(IDS_cf,sig_genes,cf_meta,cf_ftcounts)


## MISC vs CONTROL
cf_tmp_rds <- misc_control

IDS_cf <- colnames(cf_tmp_rds[['dds']])

sig_genes <- data.frame(cf_tmp_rds[['res']]) %>% filter(padj < PVAL_THRESH & abs(log2FoldChange) > LOGFC_THRESH) %>% rownames()
misc_control_dendL = get_dend(IDS_cf,sig_genes,cf_meta,cf_ftcounts)


## get groups
misc_covid_groups <- misc_covid_dendL %>% cutree(k=3)
misc_control_groups <- misc_control_dendL %>% cutree(k=3)

## add clustering to metdata
cf_meta_clust <- merge(cf_meta,data.frame(misc_covid_groups),by.x= "cfrna_sample_id", by.y=0,all.x=T)
cf_meta_clust <- merge(cf_meta_clust,data.frame(misc_control_groups),by.x= "cfrna_sample_id", by.y=0,all.x=T)

misc_covid_seed_group <- misc_covid_groups[['prevail_cu_cfrna_301']]
misc_control_seed_group <- misc_control_groups[['prevail_cu_cfrna_301']]

cf_meta_clust <- cf_meta_clust %>% filter(!is.na(misc_covid_groups)) %>% filter(!is.na(misc_control_groups))

cf_meta_clust <- cf_meta_clust %>% mutate(in_group = ifelse( (misc_covid_groups == misc_covid_seed_group) & (misc_control_groups == misc_control_seed_group), "IN", "OUT"))

##------------------------------------
## Read decon and plot
##------------------------------------

# decon = read.csv("../1_sample-data/cfrna_deconvolution.csv") %>% filter(X %in% all_of(cf_meta_clust$cfrna_sample_id))


# celltypes <- decon %>% melt(id.vars= "X") %>% group_by(variable) %>% summarize(mean_frac = mean(value)) %>% filter(mean_frac > 0.01) %>% pull(variable) %>% as.character()

# decon <- decon[,c("X",celltypes)]


CELLTYPES <- c('Intrahepatic.cholangiocyte','NK.Cell','Monocyte','Macrophage','Secretory.cell','Respiratory.ciliated.cell',
               'Ionocyte.luminal.epithelial.cell.of.mammary.gland','Fibroblast.mesenchymal.stem.cell','Respiratory.secretory.cell',
               'Mast.cell','Basal.cell','Endothelial.cell','neutrophil','B.cell','T.Cell','Erythrocyte.erythroid.progenitor','Platelet',
               'Basophil','Stromal.cell','Mature.conventional.dendritic.cell','Adventitial.cell','Gland.cell','Salivary.bronchial.secretory.cell',
               'Pericyte.cell','Myeloid.progenitor','Pancreatic.Stellate.cell','Pancreatic.alpha.beta.cell','Basal.prostate.cell',
               'Prostate.epithelia','Salivary.gland.cell','Intestinal.enterocyte','Intestinal.secretory.cell','Intestinal.tuft.cell',
               'Type.II.Pneumocyte','Cell.of.skeletal.muscle','Schwann.cell','Tendon.cell','Mesothelial.cell','Plasmablast',
               'Kidney.epithelial.cell','Thymocyte')

decon <- meta_data_all[,CELLTYPES]
decon$X <- meta_data_all$cfrna_sample_id

cols <- colnames(cf_meta_clust)
celltypes <- cols[15:(length(cols)-3)]
non_celltypes <- cols[!(cols %in% celltypes)]

celltypes <- melt(cf_meta_clust, id.vars = non_celltypes) %>% 
    group_by(variable) %>% 
    summarize(mean_frac = mean(value)) %>% filter(mean_frac > 0.01) %>% pull(variable) %>% as.character()

cf_mcd = cf_meta_clust



# nrow(cf_meta_clust)
# nrow(cf_meta_clust_decon)

ct_output <- list()
for (ct in celltypes){
    
    cf_mcd$ct <- cf_mcd[[ct]]

#     cf_mcd$ct <- scale(cf_mcd[[ct]])
    one.way <-  aov( ct ~ as.character(misc_covid_groups), data= cf_mcd)    
    ct_output[[ct]] <- c(ct,summary(one.way)[[1]][["Pr(>F)"]][1])
}
cf_mcd$ct <- NULL

ct_df <- data.frame(do.call("rbind",ct_output))
colnames(ct_df) <- c("ct","pval")

ct_df$pval.adj <- p.adjust(ct_df$pval, method = "BH")


ct_df %>% arrange(pval.adj) %>% filter(pval.adj < 0.05)

## PREPARE ANNOTATION
anno <- cf_mcd %>% select(cfrna_sample_id,misc_control_groups) %>% arrange(misc_control_groups) 
colnames(anno)[2] <- "cluster"
rownames(anno) <- anno$cfrna_sample_id

# anno$pos <- c(1:nrow(anno))
# anno <- merge(anno,solid_df)
# anno <- anno %>% arrange(pos)
# anno$pos <- NULL


anno$cfrna_sample_id <- NULL


## PREPARE MATRIX
sig_cts <- ct_df %>% arrange(pval.adj) %>% filter(pval.adj < 0.05) %>% pull(ct)
mat <- decon[,c("X",sig_cts)]
mat <- mat[match(rownames(anno),mat$X),]

# mat$pos <- c(1:nrow(mat))
# mat <- merge(mat,solid_df,by.x="X",by.y="cfrna_sample_id")
# mat$pos <- NULL

rownames(mat) <- mat$X
mat$X <- NULL

mat <- t(scale(mat))



## PREPARE COLORS
mycolors <- list(
    cluster = c("1" = "red4","2" = "purple","3" = "blue") #     respiratory_failure_MC_1yes = c("1" = "red4","0" = "purple")
)

breaksList = seq(-2, 2, by = .1)





## MAKE HEATMAP
heatmap_plt <- pheatmap(mat,
                        
             # Colors
             color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
             breaks=breaksList, 
             annotation_col=anno,
             annotation_colors=mycolors,
             na_col = "#FFFFFF",
                        
             # Fonts
             show_colnames=F,
             show_rownames=T,
             fontsize=12,
             fontsize_col=3,
             fontsize_row = 3,
             annotation_names_col=F,
             annotation_names_row=F,


                        
             treeheight_row=7,
             cluster_cols = FALSE,
             labels_col = "CLUSTER",
              cutree_rows = 4,
                        
            border_color="dark grey",
                        
              legend = FALSE,
              annotation_legend = FALSE
                       )

# print(heatmap_plt)
suppressMessages(require(ComplexHeatmap))

HEIGHT = 1.75
WIDTH = 1.5

pdf(file=paste0("plots/SupPanelE_DAAclust.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

print(heatmap_plt)
# draw(heatmap_plt,show_heatmap_legend=TRUE,padding = unit(0.25,"mm"))

dev.off()



Unnamed: 0_level_0,ct,pval,pval.adj
Unnamed: 0_level_1,<chr>,<chr>,<dbl>
Platelet,Platelet,3.70588475212129e-10,3.705885e-09
Endothelial.cell,Endothelial.cell,7.1472989013168e-07,3.573649e-06
Myeloid.progenitor,Myeloid.progenitor,0.0009513504813581,0.003171168
Monocyte,Monocyte,0.0013353831473551,0.003338458
neutrophil,neutrophil,0.0034173501934329,0.0068347
NK.Cell,NK.Cell,0.0060955083607084,0.01015918
Respiratory.ciliated.cell,Respiratory.ciliated.cell,0.0207103930279848,0.02958628


---
## Panel H - Gene module scores

In [7]:
##------------------------------------
# FUNCTIONS

get_cum_cpm <- function(genes,counts_cpm){
    
    df <- counts_cpm[gsub("\\..*","",rownames(counts_cpm)) %in% genes,]
    
    ct_counts <- data.frame(colSums(df))
    
    colnames(ct_counts) <- "score"
    
    ct_counts <- merge(ct_counts,samples,by.x=0,by.y="cfrna_sample_id")
    
    return(ct_counts)
    
}

BoxMeanQuant <- function(x) {
    v <- c(min(x), quantile(x, 0.25), mean(x), quantile(x, 0.75), max(x))
    names(v) <- c("ymin", "lower", "middle", "upper", "ymax")
    v
  }


convert_SYM_ENSEM <- function(x,ref){

    return(ref[which(ref$gene_name %in% x),]$gene_id)
}



extract_sig <- function(x,sig){
    genes = unlist(strsplit(x,","))
    genes = genes[genes %in% sig]
    return(as.character(paste(genes,collapse=",")))
    
}

add_sig_col <- function(rds){
    
    up_genes1 <- data.frame(rds[['res']]) %>% filter(padj <= 0.05 & !is.na(padj)) %>%  filter(log2FoldChange > 0) %>% rownames()
    up_genes <- gsub("\\..*","",up_genes1)
    
    df <- rds$`TopGO+`
    df[['genes_sig']] <- lapply(df$genes,extract_sig,up_genes)
    rds$`TopGO+` <- df
    
    ##-------------------------
    
    down_genes1 <- data.frame(rds[['res']]) %>% filter(padj <= 0.05 & !is.na(padj)) %>%  filter(log2FoldChange < 0) %>% rownames()
    down_genes <- gsub("\\..*","",down_genes1)
    
    df <- rds$`TopGO-`
    df[['genes_sig']] <- lapply(df$genes,extract_sig,down_genes)
    rds$`TopGO-` <- df  
    
    return(rds)
}

get_plots <- function(ct_counts,expGroupPalette,DiagnosisPalette,YLIM){

    
    ct_counts$timepoint <- factor(ct_counts$timepoint,levels=c("acute","1m","3m+","Control"))

    common <- list(theme_prevail(),
        theme(legend.position = "none",
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
#         panel.border = element_blank(),
        panel.grid.minor = element_blank()))

    box_common <- list(scale_x_discrete(expand = c(0, 0)),
        scale_fill_manual(values=expGroupPalette))


    ct_counts_SUM <- Rmisc::summarySE(ct_counts[which((ct_counts$group == "discovery") & !(grepl("Control",ct_counts$Diagnosis) )),],
                                               measurevar="score", 
                                               groupvars=c("expGroup","timepoint","Diagnosis"))

    MEAN_CNTRL <- mean(ct_counts[which(grepl("Control",ct_counts$Diagnosis)),"score"])

    ##------------------------------------
    # Control Boxplot

    cntrl_boxplot <- ct_counts %>% 
    filter(grepl("Control",Diagnosis)) %>%
    ggplot(aes(x=expGroup,y=score,fill=expGroup))+
    geom_boxplot(width=0.5/4,size = 0.2,outlier.size = .01)+
    stat_summary(fun=mean, geom="point", shape=18, size=.2, color="white", fill="white") 

    ##------------------------------------
    # Acute Boxplot

    acute_boxplot <- ct_counts %>% 
    filter(grepl("Control|acute",expGroup)) %>%
    mutate(expGroup = factor(expGroup,levels=c("Control_Non-inflammatory_Not-hospitalized_discovery","MIS-C_acute_validation","MIS-C_acute_discovery","COVID-19_acute_discovery")))%>%
    ggplot(aes(x=expGroup,y=score))+
#     geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .7, linetype = "dashed",color = "black" )+
    geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .5, linetype = "dashed",color = DiagnosisPalette[which(names(DiagnosisPalette) == "Control")][1] )+
    geom_boxplot(aes(fill=expGroup), width=0.75,size = 0.2,outlier.size = .01, outlier.shape=NA)+
    stat_summary(fun=mean, geom="point", shape=18, size=.2, color="white", fill="white") 

    ##------------------------------------ 
    # LINE PLOT
    pd <- position_dodge(0.05)

    line_plot <- ct_counts_SUM %>%
    ggplot(aes(x=timepoint, y=score, colour=Diagnosis, group=Diagnosis)) + 
#         geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .7, linetype = "dashed",color = "black" )+
        geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .5, linetype = "dashed",color = DiagnosisPalette[which(names(DiagnosisPalette) == "Control")][1] )+
        geom_errorbar(aes(ymin=score-se, ymax=score+se), width=.1, position=pd) +
        geom_line(position=pd) +
        geom_point(position=pd, size = 0.5)+
    scale_x_discrete(expand = c(.1, 0))+
    scale_color_manual(values=DiagnosisPalette)
    
    

    ##------------------------------------
    # Customize

    # Calculate Y limits
    y_range_cntl <- layer_scales(cntrl_boxplot)$y$range$range
    y_range_acute <- layer_scales(acute_boxplot)$y$range$range
    y_range_ln <- layer_scales(line_plot)$y$range$range

    starts <- c(y_range_cntl[1],y_range_acute[1],y_range_ln[1],0)
    stops <- c(y_range_cntl[2],y_range_acute[2],y_range_ln[2])
    
    proper_scale <- list(coord_cartesian(ylim = c(min(starts), max(stops))), 
                    scale_y_continuous(labels = function(x) format(x, scientific = TRUE, digits = 2),
                                       breaks = seq(0,max(stops),length.out  = 4)))
    
    
    
    ##---------------------------------------
    # ADD SIG BARS
    
    stat.test <- ct_counts  %>% 
    filter(grepl("Control|acute",expGroup)) %>%
    mutate(expGroup = factor(expGroup,levels=c("Control_Non-inflammatory_Not-hospitalized_discovery","MIS-C_acute_validation","MIS-C_acute_discovery","COVID-19_acute_discovery")))%>%
    wilcox_test(score ~ expGroup) %>% 
    adjust_pvalue(method = "BH") %>% 
    add_significance("p.adj") %>% 
    add_xy_position(x = "expGroup")
    
    stat.test <- stat.test %>% arrange(desc(y.position))
    stat.test$rank <- c(1:nrow(stat.test))
    stat.test$y.position <- YLIM - ((0.05*stat.test$rank)*YLIM)
                                       
    sig_bars <- stat_pvalue_manual(stat.test, label = "p.adj.signif",tip.length=0) 
    
    acute_boxplot <- acute_boxplot + sig_bars
    
#     return(acute_boxplot)
#     print(stat.test)
    
    ##---------------------------------------
    # FINALIZE
            
    line_plot <- line_plot + common + proper_scale
                                       
    cntrl_boxplot <- cntrl_boxplot + common + box_common + proper_scale
                        
    acute_boxplot <- acute_boxplot + common + box_common + proper_scale
    


    
    return(list("line_plot" = line_plot,
                "cntrl_boxplot" = cntrl_boxplot,
                "acute_boxplot" = acute_boxplot))
    
    }

In [8]:
##------------------------------------

counts_cpm <- cpm_ftcount
samples <- meta_data_all %>% 
    filter(timepoint != "post-acute")

samples$expGroup <- paste0(samples$Diagnosis,"_",samples$timepoint,"_",samples$group)
GROUPS = c("Control_Non-inflammatory_Not-hospitalized_discovery","MIS-C_acute_validation",
           "COVID-19_acute_discovery","COVID-19_1m_discovery","COVID-19_3m+_discovery",
           "MIS-C_acute_discovery","MIS-C_1m_discovery","MIS-C_3m+_discovery"
          )
samples <- samples %>% filter(expGroup %in% GROUPS) %>% mutate(timepoint = ifelse(grepl("Control_Non",Diagnosis),"Control",timepoint))

counts_cpm <- counts_cpm[,samples$cfrna_sample_id]


expGroupPalette = c('#00FFFF','#007FFF','#FF007F','#7FFF00')
names(expGroupPalette) <- c("MIS-C_acute_validation","MIS-C_acute_discovery","COVID-19_acute_discovery","Control_Non-inflammatory_Not-hospitalized_discovery")

DiagnosisPalette = c('#5CB2EB','#F0484E','#FBE77C')
names(DiagnosisPalette) <- c("MIS-C","COVID-19","Control")


expGroupPalette = c('COVID-19_acute_discovery' = '#F0484E', 'MIS-C_acute_discovery' = '#5CB2EB', 'Control_Non-inflammatory_Not-hospitalized_discovery' = '#FBE77C',
                    "MIS-C_acute_validation" = '#2BC0B3')


##------------------------------------
# TOPGO
# MIS-C UP

covid_control <- add_sig_col(readRDS("./daa_output/covid-control.rds"))
misc_control <- add_sig_col(readRDS(file = "./daa_output/misc-control.rds"))
misc_covid <- add_sig_col(readRDS("./daa_output/misc-covid.rds"))


misc_up <- rbind(misc_control$`TopGO+`,misc_covid$`TopGO+`) %>% filter(Significant > 15) #%>% dplyr::rename(genes_sig = genes)
misc_up$genes_sig <- unlist(misc_up$genes_sig)
misc_up <- aggregate(misc_up$genes_sig, list(misc_up$Term), paste, collapse=",")
misc_list = list()
for (i in 1:nrow(misc_up)){
    misc_list[[misc_up[i,'Group.1']]] <- unlist(strsplit(misc_up[i,"x"],","))
}

# COVID-19 UP
covid_up <- rbind(covid_control$`TopGO+`,misc_covid$`TopGO-`) %>% filter(Significant > 15) #%>% dplyr::rename(genes_sig = genes)
covid_up$genes_sig <- unlist(covid_up$genes_sig)
covid_up <- aggregate(covid_up$genes_sig, list(covid_up$Term), paste, collapse=",")
covid_list = list()
for (i in 1:nrow(covid_up)){
    covid_list[[covid_up[i,'Group.1']]] <- unlist(strsplit(covid_up[i,"x"],","))
}


# CONTROL UP
control_up <- rbind(covid_control$`TopGO-`,misc_control$`TopGO-`) %>% filter(Significant > 15) #%>% dplyr::rename(genes_sig = genes)

control_up$genes_sig <- unlist(control_up$genes_sig)
control_up <- aggregate(control_up$genes_sig, list(control_up$Term), paste, collapse=",")
control_list = list()
for (i in 1:nrow(control_up)){
    control_list[[control_up[i,'Group.1']]] <- unlist(strsplit(control_up[i,"x"],","))
}



# CONTROL DOWN
control_dwn <- rbind(covid_control$`TopGO+`,misc_control$`TopGO+`) %>% filter(Significant > 15) #%>% dplyr::rename(genes_sig = genes)

control_dwn$genes_sig <- unlist(control_dwn$genes_sig)
control_dwn <- aggregate(control_dwn$genes_sig, list(control_dwn$Term), paste, collapse=",")
control_dwn_list = list()
for (i in 1:nrow(control_dwn)){
    control_dwn_list[[control_dwn[i,'Group.1']]] <- unlist(strsplit(control_dwn[i,"x"],","))
}


gene_key <- list()

gene_key[['endothelial cell migration']] <- misc_list[['endothelial cell migration']]

gene_key[['myeloid cell differentiation']] <- misc_list[['myeloid cell differentiation']]


output <- list()

# final_list <- names(gene_key)

for (f in names(gene_key)){
    
    ##------------------
    # Get Genes
    
    ct_genes <- gene_key[[f]]
    
    ##------------------
    # Extract counts
    ct_counts <- get_cum_cpm(ct_genes,counts_cpm)
        
    ##------------------
    # Save
    output[[f]] <- ct_counts
    
    }

In [9]:
WIDTH = 2.15
HEIGHT = 1.5

NOT_LEFT = list(theme(axis.text.y = element_blank(),axis.ticks.y=element_blank()))

## ACUTE - LINE
lay <- rbind(c(1,1,3,3,2))
ACUTE_MARGIN = theme(plot.margin=grid::unit(c(0,-.2,0,0), "in"))
CNTRL_MARGIN = theme(plot.margin=grid::unit(c(0,-.5,0,-.3), "in"))
LINE_MARGIN = theme(plot.margin=grid::unit(c(0,-0.2,0,-0.05), "in"))
GRID_fun <- function(acute_boxplot, cntrl_boxplot, line_plot, lay){grid.arrange(acute_boxplot, cntrl_boxplot, line_plot, layout_matrix = lay)}

# ACUTE - LINE
lay <- rbind(c(1,1,2,2))
ACUTE_MARGIN = list(theme(plot.margin=grid::unit(c(0.02,-.2,0,0), "in")),scale_x_discrete(expand = c(.5, 1.5)))
# ACUTE_MARGIN = theme(plot.margin=grid::unit(c(0,-.2,0,0), "in"))
# CNTRL_MARGIN = theme(plot.margin=grid::unit(c(0,-.5,0,-.3), "in"))
LINE_MARGIN = theme(plot.margin=grid::unit(c(0.02,0.02,0,-0.05), "in"))
GRID_fun <- function(acute_boxplot, cntrl_boxplot, line_plot, lay){grid.arrange(acute_boxplot, line_plot, layout_matrix = lay)}

# # ACUTE - LINE
lay <- rbind(c(2,2,1,1))
ACUTE_MARGIN = list(theme(plot.margin=grid::unit(c(0.02,-.025,0,0), "in")),scale_x_discrete(expand = expansion(add = 0.5)))
# ACUTE_MARGIN = theme(plot.margin=grid::unit(c(0,-.2,0,0), "in"))
# CNTRL_MARGIN = theme(plot.margin=grid::unit(c(0,-.5,0,-.3), "in"))
LINE_MARGIN = theme(plot.margin=grid::unit(c(0.02,0.02,0,-0.025), "in"))
GRID_fun <- function(acute_boxplot, cntrl_boxplot, line_plot, lay){grid.arrange( line_plot, acute_boxplot, layout_matrix = lay)}


##------------------------------------
SET = 'endothelial cell migration'
print(SET)
print(length(gene_key[[SET]]))
LIM = 4500


all_plots <- get_plots(output[[SET]],
          expGroupPalette,DiagnosisPalette,LIM)

LIMITS =  list(scale_y_continuous(breaks = seq(0,LIM,length.out  = 4),labels = function(x) format(x, scientific = TRUE, digits = 2)),
               coord_cartesian(ylim = c(0,LIM))
              )

cntrl_boxplot <- all_plots[["cntrl_boxplot"]] + CNTRL_MARGIN + NOT_LEFT + LIMITS
acute_boxplot <- all_plots[["acute_boxplot"]] + ACUTE_MARGIN + LIMITS
line_plot <- all_plots[["line_plot"]] + LINE_MARGIN + NOT_LEFT + LIMITS
               
pdf(file=paste0("plots/panelH_endothelialCellMigration.pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

all_plt <- GRID_fun(acute_boxplot, cntrl_boxplot, line_plot, lay)

print(all_plt)
               
# grid.rect(width = 0.99, height = 0.99, gp = gpar(lwd = .5, col = "black", fill = NA))

dev.off()

##------------------------------------
SET = 'myeloid cell differentiation'
print(SET)
print(length(gene_key[[SET]]))
LIM = 40000
               
all_plots <- get_plots(output[[SET]],
          expGroupPalette,DiagnosisPalette,LIM)


LIMITS =  list(scale_y_continuous(breaks = seq(0,LIM,length.out  = 4),labels = function(x) format(x, scientific = TRUE, digits = 2)),
               coord_cartesian(ylim = c(0,LIM))
              )

cntrl_boxplot <- all_plots[["cntrl_boxplot"]] + CNTRL_MARGIN + LIMITS + NOT_LEFT
acute_boxplot <- all_plots[["acute_boxplot"]] + ACUTE_MARGIN + LIMITS
line_plot <- all_plots[["line_plot"]] + LINE_MARGIN + LIMITS + NOT_LEFT
    
               
pdf(file="plots/panelH_myeloidCellDifferentiation.pdf",
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

all_plt <- GRID_fun(acute_boxplot, cntrl_boxplot, line_plot, lay)

print(all_plt)

dev.off()

[1] "endothelial cell migration"
[1] 21


Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.



TableGrob (1 x 4) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,3-4) arrange gtable[layout]
2 2 (1-1,1-2) arrange gtable[layout]


[1] "myeloid cell differentiation"
[1] 108


Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.



TableGrob (1 x 4) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,3-4) arrange gtable[layout]
2 2 (1-1,1-2) arrange gtable[layout]


---
# TEXT

In [16]:
GENES = c("AKAP12","CNN3","FZD4","GAS7","FEZ1","VAT1","FSCN1","AFAP1L1","ITGA9","TGM2")
length(GENES)
misc_covid$res %>% data.frame() %>% filter(gene_name %in% GENES)


GENES = c("IFI6","IFIT1","IFI44L","IFI27","IFITM1","RSAD2","MX1", "CMPK2", "LY6E","CXCL5", "CXCL3","OR2B6", "ENKUR")
length(GENES)
misc_covid$res %>% data.frame() %>% filter(gene_name %in% GENES) 

Unnamed: 0_level_0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
ENSG00000117519.16,22.538775,2.317066,0.5465785,4.239219,2.24299e-05,0.0012646384,CNN3,protein_coding
ENSG00000144668.12,2.896342,2.1406801,0.442356,4.83927,1.30317e-06,0.0002126911,ITGA9,protein_coding
ENSG00000157510.14,10.579467,2.4592161,0.5569188,4.415754,1.006587e-05,0.0007431969,AFAP1L1,protein_coding
ENSG00000131016.17,84.145949,2.4403501,0.5195859,4.696721,2.64371e-06,0.0003323573,AKAP12,protein_coding
ENSG00000075618.18,7.368068,2.311874,0.5595895,4.131375,3.606005e-05,0.0018331509,FSCN1,protein_coding
ENSG00000174804.4,3.322412,2.2822851,0.5571817,4.096124,4.201256e-05,0.0020250924,FZD4,protein_coding
ENSG00000149557.14,6.449785,1.8116713,0.5011939,3.614712,0.000300682,0.0078137433,FEZ1,protein_coding
ENSG00000007237.19,25.87357,0.9252772,0.2263312,4.088156,4.348156e-05,0.0020744049,GAS7,protein_coding
ENSG00000108828.16,20.189719,1.2559348,0.3173517,3.957548,7.572298e-05,0.0029995397,VAT1,protein_coding
ENSG00000198959.12,26.017137,1.8557066,0.4333083,4.282647,1.846829e-05,0.0010832803,TGM2,protein_coding


Unnamed: 0_level_0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
ENSG00000126709.16,8.925773,-2.830069,0.4984063,-5.678237,1.360902e-08,6.226225e-06,IFI6,protein_coding
ENSG00000137959.17,26.259651,-2.308825,0.3672807,-6.286267,3.251904e-10,3.025247e-07,IFI44L,protein_coding
ENSG00000134326.12,10.396875,-2.226702,0.346645,-6.423579,1.331068e-10,1.446604e-07,CMPK2,protein_coding
ENSG00000134321.13,12.833584,-2.472075,0.3244062,-7.620308,2.530714e-14,1.177162e-10,RSAD2,protein_coding
ENSG00000163735.7,53.654238,-2.080878,0.4747247,-4.383337,1.168749e-05,0.000817509,CXCL5,protein_coding
ENSG00000163734.4,2.333162,-2.023051,0.4663381,-4.338163,1.436787e-05,0.0009412977,CXCL3,protein_coding
ENSG00000124657.1,2.632981,-3.126866,0.6545266,-4.777295,1.776694e-06,0.0002542859,OR2B6,protein_coding
ENSG00000160932.11,12.863741,-1.776512,0.2875371,-6.178374,6.476503e-10,5.477355e-07,LY6E,protein_coding
ENSG00000151023.17,35.459566,-2.253271,0.4436443,-5.079004,3.794196e-07,9.050617e-05,ENKUR,protein_coding
ENSG00000185745.10,10.983196,-2.493865,0.4095804,-6.088828,1.137399e-09,7.799598e-07,IFIT1,protein_coding
