In [1]:
suppressMessages(require(tidyverse))
suppressMessages(library(DESeq2))

suppressMessages(require(pheatmap))
suppressMessages(require(RColorBrewer))
suppressMessages(require(dendextend))
suppressMessages(require(ComplexHeatmap))
suppressMessages(require(circlize))

suppressMessages(library(data.table))


suppressMessages(library(rstatix))
suppressMessages(library(ggpubr))

suppressMessages(library(grid))
suppressMessages(library(gridExtra))

source("../0_support-files/theme_CRP-MISC.R")

annotation <- fread(file="../0_support-files/gencode.biotype.name.key.tsv") %>% mutate(gene_key = paste0(gene_id ,"_", gene_name))



In [2]:
covid_control <- read.delim("./tables/wb_covid-cntrl_all_DESeq.tsv") %>% mutate(gene_id = GeneID)
misc_control <- read.delim("./tables/wb_misc-control_all_DESeq.tsv") %>% mutate(gene_id = GeneID)
misc_covid <- read.delim("./tables/wb_misc-covid_all_DESeq.tsv") %>% mutate(gene_id = GeneID)


### Read in metadata
meta_data_all <- read.delim("../1_sample-data/STable7_wbrna-samples.tsv")
meta_data_all[is.na(meta_data_all$ivig_rel_samp),"ivig_rel_samp"] <- "noivig"

### Read ftcount matrix
raw_ftcount <- read.delim("../1_sample-data/wbrna_ftcounts.txt",row.names = 1)
nrow(raw_ftcount)

### Read gene list to remove
gene.list <- read.delim("../0_support-files/genelist.remove.tsv",col.names = c("type,","ENSMBL","gene_symbol"))
gene.ids <- gsub("\\..*","",rownames(raw_ftcount))

### Subset count matrix
exclude.idx <- gene.ids %in% gene.list[,2]
raw_ftcount = raw_ftcount[!exclude.idx,]          

nrow(raw_ftcount)

raw_ftcount <- raw_ftcount[,colSums(raw_ftcount) > 0]

cpm_ftcount <- edgeR::cpm(raw_ftcount)
head(cpm_ftcount)

Unnamed: 0,PV351,PV352,PV353,PV358,PV359,PV364,PV354,PV357,PV360,PV365,⋯,PV41,PV31,PV33,PV9,PV42,PV338,PV335,PV334,PV340,PV336
ENSG00000000419.14_DPM1,35.498982,33.203382,32.165358,31.897127,37.551375,32.018252,22.085542,35.625051,24.989185,34.601058,⋯,25.0876874,45.16244,23.187441,33.255753,28.5699,50.79765,15.908241,27.1536041,43.316733,66.013473
ENSG00000000457.14_SCYL3,22.088255,14.884275,6.992469,7.974282,11.179417,6.800337,7.794897,6.785724,1.298139,3.615036,⋯,12.7429524,0.0,13.639671,12.959963,10.25586,16.51703,0.0,6.8961534,16.614637,16.637542
ENSG00000000460.17_C1orf112,5.522064,2.289888,4.545105,9.113465,6.879641,1.983432,1.732199,10.178586,15.253139,12.394409,⋯,8.7607797,0.0,2.727934,10.759214,10.98842,10.59583,7.954121,4.7411055,6.527179,4.293559
ENSG00000000938.13_FGR,531.695857,776.272176,483.878862,436.307133,474.408595,820.290618,372.422866,116.205525,221.33278,356.339249,⋯,359.9884039,935.23894,546.950817,423.766322,408.03679,593.98967,454.9757,558.5884264,780.887946,585.53414
ENSG00000000971.16_CFH,0.0,0.0,0.0,0.0,1.146607,1.983432,0.0,0.0,0.0,0.0,⋯,0.7964345,0.0,0.0,1.956221,0.0,4.67463,0.0,0.4310096,0.0,2.683475
ENSG00000001036.14_FUCA2,5.522064,4.579777,14.684185,5.695916,5.446383,7.650379,12.558445,15.267879,5.517093,20.140914,⋯,10.3536488,16.93592,4.091901,22.741066,10.25586,13.08896,3.181648,7.327163,4.747039,31.128304


---
## Heatmaps

#### _Functions_

In [3]:
create_heatmap <- function(res,groups,cpm,meta_data) {
    
    ANNOTATIONS = c("Diagnosis","severity",'group','ivig_rel_samp')

    
    ###----------------------------------
    ## Extract significant genes
    sig_genes <- data.frame(res) %>% filter(padj < SIG_THRESH & abs(log2FoldChange) > FC_THRESH) %>% pull(gene_id)
    print(length(sig_genes))

    ###----------------------------------
    ## Subset metadata 
    metadata <- meta_data %>% 
        filter((Diagnosis %in% all_of(groups)) & (timepoint %in% c("Not-hospitalized","acute"))) %>% 
        column_to_rownames(var = "wbrna_sample_id") %>%  
        mutate(severity = ifelse(grepl("ontrol",severity),NA,severity)) %>%
        select(all_of(ANNOTATIONS))

    colnames(metadata) <- c("Diagnosis","Severity","group","IVIG")

    ###----------------------------------
    ## Subset count matrix
    mat <- data.frame(cpm) %>% 
            filter(row.names(cpm) %in% all_of(sig_genes)) %>% 
            select(all_of(rownames(metadata))) %>% 
            as.matrix()

    mat <- t(scale(t(mat)))

    ###----------------------------------
    ## Colors
    color = colorRampPalette(c("blue","yellow"))(50)
    breaksList = seq(-2, +2, length = 51)

    #color_groups = c('COVID-19\ndiscovery' = '#c1272d', 'MIS-C\ndiscovery' = '#0000a7', 'Control_Non-inflammatory\ndiscovery' = '#eecc16',"MIS-C\nvalidation" = '#008176')
    
    my_colour <- list(
    Diagnosis = c('COVID-19' = '#F0484E', 'MIS-C' = '#5CB2EB', 'Control_Non-inflammatory' = '#FBE77C'), #"MISC_acute_validation" = '#00FFFF'),
    Severity = c("-1" = "white", "0" = "white", "2" = "#efe5d7", "3" = "#bc8e52"),
    group = c("validation" = "orange", "discovery" = "light blue", "UCSF" = "light blue"),
    IVIG = c("after" = "maroon", "before" = "darkseagreen2", "concurrent with" = "dark blue","noivig"="white")#, "forest green"
    # PCR = c("1" = "green", "2" = "blue"),
    # Antibody =c("1" = "green", "2" = "blue")
    )

    ###----------------------------------
    ## Plot

    # pdf("./tmp.pdf")
    heatmap_plt <- pheatmap(mat,

             # Colors
             col=color,
             breaks=breaksList, 
             annotation_col=metadata,
             annotation_colors=my_colour,
             na_col = "#FFFFFF",

             # Fonts
             show_colnames=F,
             show_rownames=F,
             fontsize=12,
             fontsize_col=3,
             annotation_names_col=F,
             annotation_names_row=F,

             # Clustering

             clustering_distance_cols="correlation",
             clustering_distance_rows="correlation", 
    #          clustering_distance_cols="euclidean",
    #          clustering_distance_rows="euclidean",
    #          cluster_cols=hc,
    #          cluster_rows=hr,
    #          clustering_distance_rows="euclidean",
    #          clustering_method="complete",
             treeheight_row=0,
            treeheight_col= 15,

             # Misc.
             border_color=NA,
            legend=FALSE,
            annotation_legend=FALSE
            ) 
    # dev.off()




    return(heatmap_plt)

        }

In [4]:
SIG_THRESH = 0.01
FC_THRESH = 1.5

# SIG_THRESH = 0.05
# FC_THRESH = 0

ANNOTATIONS <- c("Diagnosis","severity")

In [5]:

covid_control_plt <- create_heatmap(covid_control,c("COVID-19","Control_Non-inflammatory"),cpm_ftcount,meta_data_all)


misc_control_plt <- create_heatmap(misc_control,c("MIS-C","Control_Non-inflammatory"),cpm_ftcount,meta_data_all)


misc_covid_plt <- create_heatmap(misc_covid,c("MIS-C","COVID-19"),cpm_ftcount,meta_data_all)

[1] 1097
[1] 2024


`use_raster` is automatically set to TRUE for a matrix with more than
2000 rows. You can control `use_raster` argument by explicitly setting
TRUE/FALSE to it.

Set `ht_opt$message = FALSE` to turn off this message.

'magick' package is suggested to install to give better rasterization.

Set `ht_opt$message = FALSE` to turn off this message.



[1] 84


In [7]:
WIDTH = 1.6
HEIGHT = 2.6

RES = 300

##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "COVID19"
       
png(file="plots/panelB_covid-control.png",
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(covid_control_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "MISC"

png(file="plots/panelB_misc-control.png",
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(misc_control_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

##------------------------------------
# SAVE HEATMAP OBJECT

prefix = "MISC_COVID19"

png(file="plots/panelB_misc-covid.png",
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

# pdf(file=paste0("plots/",prefix,".heatmap.cfrna.pdf"),
#         width=WIDTH,height=HEIGHT, paper="special", bg="transparent",
#         fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(misc_covid_plt,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()

In [32]:
res1=covid_control
res2=misc_control
res3=misc_covid
cpm=cpm_ftcount
meta_data= meta_data_all
groups = c(1,3,57,58,33,35,2,4,34,36,31)
    
ANNOTATIONS = c("Diagnosis","severity",'group')


###----------------------------------
## Extract significant genes

N = 300

sig_genes1 <- data.frame(res1) %>% filter(padj < SIG_THRESH & abs(log2FoldChange) > FC_THRESH) %>% arrange(desc(abs(log2FoldChange))) %>%
head(N) %>% pull(gene_id)

sig_genes2 <- data.frame(res2) %>% filter(padj < SIG_THRESH & abs(log2FoldChange) > FC_THRESH) %>% arrange(desc(abs(log2FoldChange))) %>%
head(N) %>% pull(gene_id)

sig_genes3 <- data.frame(res3) %>% filter(padj < SIG_THRESH & abs(log2FoldChange) > FC_THRESH) %>% arrange(desc(abs(log2FoldChange))) %>%
head(N) %>% pull(gene_id)

sig_genes <- union(sig_genes1,union(sig_genes2,sig_genes3))

print(length(sig_genes))


###----------------------------------
## Subset metadata 
metadata <- meta_data %>% 
    # filter(sample_group %in% all_of(groups)) %>% 
    filter(timepoint == "acute" | Diagnosis == "Control_Non-inflammatory") %>%
    # mutate(sample_group = factor(sample_group, levels = all_of(groups))) %>% 
    # arrange(sample_group) %>% 
    column_to_rownames(var = "wbrna_sample_id") %>%  
    mutate(severity = ifelse(grepl("ontrol",severity),NA,severity)) %>%
    select(all_of(ANNOTATIONS))

colnames(metadata) <- c("Diagnosis","Severity","group")

###----------------------------------
## Subset count matrix
mat <- data.frame(cpm) %>% 
        filter(row.names(cpm) %in% all_of(sig_genes)) %>% 
        select(all_of(rownames(metadata))) %>% 
        as.matrix()

mat <- t(scale(t(mat)))

mat <- mat[,rownames(metadata)]

###----------------------------------
## Colors
color = colorRampPalette(c("blue","yellow"))(50)
breaksList = seq(-2, +2, length = 51)

#color_groups = c('COVID-19\ndiscovery' = '#c1272d', 'MIS-C\ndiscovery' = '#0000a7', 'Control_Non-inflammatory\ndiscovery' = '#eecc16',"MIS-C\nvalidation" = '#008176')

my_colour <- list(
Diagnosis = c('COVID-19' = '#F0484E', 'MIS-C' = '#5CB2EB', 'Control_Non-inflammatory' = '#FBE77C'), #"MISC_acute_validation" = '#00FFFF'),
Severity = c("-1" = "white", "0" = "white", "2" = "#efe5d7", "3" = "#bc8e52"),
group = c("validation" = "orange", "discovery" = "light blue","UCSF" = "light blue") #,
# PCR = c("1" = "green", "2" = "blue"),
# Antibody =c("1" = "green", "2" = "blue")
)

###----------------------------------
## Plot

heatmap_plt_clust <- pheatmap(mat,

         # Colors
         col=color,
         breaks=breaksList, 
         annotation_col=metadata,
         annotation_colors=my_colour,
         na_col = "#FFFFFF",

         # Fonts
         show_colnames=F,
         show_rownames=F,
         fontsize=12,
         fontsize_col=3,
         annotation_names_col=F,
         annotation_names_row=F,

         # Clustering

         clustering_distance_cols="correlation",
         clustering_distance_rows="correlation", 
         treeheight_row=0,
        treeheight_col= 15,

         # Misc.
         border_color=NA,
        legend=FALSE,
        annotation_legend=FALSE
        ) 

IDTH = 1.6
HEIGHT = 2.6

RES = 300


prefix = "ALL_CLUSTER"

png(file="plots/panelC_all.png",
        width=WIDTH,height=HEIGHT, units ="in", bg="white", res = RES, #useRaster = TRUE,
        fonts="Helvetica",  pointsize=6)

draw(heatmap_plt_clust,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()



[1] 487


In [33]:
pdf(file="plots/panelC_all.pdf",
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

draw(heatmap_plt_clust,show_heatmap_legend=FALSE,padding = unit(0.25,"mm"))

dev.off()



---
## Biomarkers

In [3]:
get_exp_plot <- function(GENE,meta_data,count_cpm,gene_lookup= annotation){
    
    samp_df <- data.frame("cfrna_sample_id" = meta_data$cfrna_sample_id,
                      "Diagnosis" = meta_data$Diagnosis,
                     "severity" = meta_data$severity,
                     "group" = meta_data$group)
    
    GENE <- gene_lookup[which(gene_lookup$gene_name == GENE),]$gene_id

    samp_df <- merge(samp_df,count_cpm[,GENE],by.x="cfrna_sample_id",by.y=0)

    samp_df$plotting_name <- paste(samp_df$Diagnosis,samp_df$group,sep="\n")

    samp_df$plotting_name <- factor(samp_df$plotting_name, levels = c("Control_Non-inflammatory\ndiscovery","COVID-19\ndiscovery","MIS-C\ndiscovery","MIS-C\nvalidation"))

    samp_df %>%
    ggplot(aes(x=plotting_name,y=y,fill=plotting_name))+
    geom_boxplot(outlier.shape = NA)+
    geom_jitter(height=0,width=0.2,size = 1)+
    annotate(geom="text",label=gsub(".*_","",GENE),x=-Inf,y=Inf,hjust=-.1,vjust=1.25)+
    scale_fill_manual(values=color_groups)+
    theme_alex()+
    theme(legend.position = "none",
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
         plot.title = element_text(hjust = 0.5))+
    labs(y="CPM")+
#     labs(y="TPM",title=gsub(".*\\_","",GENE))+
    scale_x_discrete(guide = guide_axis(n.dodge = 2))
}


get_fig_plot <- function(GENE,meta_data,count_cpm,YLIM, gene_lookup= annotation, expGroupPalette=expGroupPalette){
    
    ##---------------------------------------
    # PREPARE DATA
    samp_df <- data.frame("cfrna_sample_id" = meta_data$cfrna_sample_id,
                      "Diagnosis" = meta_data$Diagnosis,
                     "severity" = meta_data$severity,
                        "group" = meta_data$group)
    GENE_N <- GENE
    GENE <- gene_lookup[which(gene_lookup$gene_name == GENE),]$gene_key

    samp_df <- merge(samp_df,count_cpm[,GENE],by.x="cfrna_sample_id",by.y=0)

    samp_df$plotting_name <- samp_df$Diagnosis
    
    samp_df$plotting_name <- ifelse(samp_df$Diagnosis == "MIS-C" & samp_df$group == "validation","MIS-C_validation",samp_df$plotting_name)

    samp_df$plotting_name <- factor(samp_df$plotting_name, levels = c("Control_Non-inflammatory","COVID-19","MIS-C","MIS-C_validation")) #,"MIS-C_validation"
    
    ##---------------------------------------
    # MAKE PLOT
    PLOT <- samp_df %>%
    ggplot(aes(x=plotting_name,y=y,color=plotting_name))+
#     geom_jitter(height=0,width=0.2,size = .65, color="black")+
    geom_point(  size = .75, position = position_jitter(seed = 42,height=0,width=0.2))+ #colour="black",pch=21,aes(fill=plotting_name),
#     annotate(geom="text",label=gsub(".*_","",GENE_N),x=-Inf,y=Inf,hjust=-.1,vjust=1.25,size = 2)+
    scale_color_manual(values=color_groups)+
    theme_prevail()+
    theme(legend.position = "none",
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        panel.grid.minor = element_blank(),
#         panel.grid.major = element_blank(),
#         panel.border = element_blank(),
        plot.title = element_text(hjust = 0.5),
         plot.margin=grid::unit(c(0.02,0.02,0,0), "in"))+
    labs(y="CPM")+
#     labs(y="TPM",title=gsub(".*\\_","",GENE))+
    scale_x_discrete(guide = guide_axis(n.dodge = 2)) #+
#     scale_color_manual(values=expGroupPalette)
    
    ##---------------------------------------
    # ADD ARROWS
    outliers <- samp_df%>% 
            filter(y > YLIM)
    
    print("num outliers:")
    print(nrow(outliers))

    y_stop = YLIM 
    y_start= YLIM - (YLIM*0.05)

    XORDER <- list('Control_Non-inflammatory'= 1,
                'COVID-19'= 2,
                'MIS-C'= 3,
                'MIS-C'= 4)

    offset_x = 1  # offset for different condition / plotting names
    offset_y = .20     # offset from top for different outliers in same condition


    text_offset = 0.09
    text_size = 1.5
    text_lineheight = .75

    arrow_size = 0.5

    for (i  in 1:length(XORDER)){
        group_name <- names(XORDER[i])
        group_outliers <- outliers %>% filter(plotting_name == group_name)

        ## ADD ARROW
        if (nrow(group_outliers) > 0){

            outlier_values <- round(group_outliers[, "y",drop=TRUE],2)
            outlier_values <- as.character(outlier_values[order(outlier_values,decreasing=TRUE)])

            text_center <- y_start * (1 - (.01*(length(outlier_values)-1)))

            XVAL <- XORDER[[group_name]]        

            PLOT <- PLOT + annotate("segment", x = XVAL, xend = XVAL,
                                              y = YLIM-0.007, yend = YLIM,
                                              size = arrow_size, lineend="butt", linejoin="mitre", arrow=arrow(length=unit(.06,"npc")))



            for (ii in 1:length(outlier_values)){

                text_color = "black"
                if (group_outliers[ii, "severity"] == 2){
                    text_color = "red"} 

                if (ii == 1){YVAL = YLIM
                            }else { 
                    YVAL = YLIM- ((ii-1)*0.006)
                }

                VAL <- outlier_values[ii]

                PLOT <- PLOT + annotate("text", x = XVAL, y = YVAL, hjust=-0.2,vjust=0.5,
                                              label = VAL,
                                              size = text_size, family = "Helvetica", lineheight = text_lineheight,color = text_color)

            }}}
    
    ##---------------------------------------
    # ADD SIG BARS
    stat.test <- samp_df %>%
    wilcox_test(y ~ plotting_name) %>%  #, ref.group = "MIS-C\ndiscovery"
    adjust_pvalue(method = "BH") %>% 
    add_significance("p.adj") %>% 
    add_xy_position(x = "plotting_name")
    
#     print(stat.test)
    
    stat.test <- stat.test %>% arrange(desc(y.position))
    stat.test$rank <- c(1:nrow(stat.test))
    stat.test$y.position <- YLIM - ((0.05*stat.test$rank)*YLIM)
    
    print(stat.test)
    PLOT <- PLOT + stat_pvalue_manual(stat.test, label = "p.adj.signif",tip.length=0) 
    
    return(PLOT)
}


In [4]:
keeper_columns <- c("gene_name","gene_type","baseMean","padj","log2FoldChange")
color_groups = c('COVID-19' = '#F0484E', 'MIS-C' = '#5CB2EB', 'Control_Non-inflammatory' = '#FBE77C', "MIS-C_validation" = "#2BC0B3")


PVAL_CUTOFF = 0.05
GROUPS = c("COVID-19", "Control_Non-inflammatory","MIS-C")



meta_data <- meta_data_all %>% 
    filter((Diagnosis %in% all_of(GROUPS)) & (timepoint %in% c("Not-hospitalized","acute"))) %>%
    mutate(cfrna_sample_id = wbrna_sample_id)


counts_cpm <- cpm_ftcount %>% t()

In [10]:
GENE = "ADAMTS2"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 410

pdf(file=paste0("plots/panelD_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,137,274,411)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 0
[90m# A tibble: 6 × 14[39m
  .y.   group1       group2    n1    n2 statistic        p    p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C        MIS-C…    69    19     [4m1[24m154  4.4 [90me[39m[31m- 7[39m 8.8 [90me[39m[31m- 7[39m ****        
[90m2[39m y     COVID-19     MIS-C…    36    19      497  6   [90me[39m[31m- 3[39m 7.2 [90me[39m[31m- 3[39m **          
[90m3[39m y     COVID-19     MIS-C     36    69     [4m1[24m012. 1.2 [90me[39m[31m- 1[39m 1.2 [90me[39m[31m- 1[39m ns          
[90m4[39m y     Control_Non… MIS-C…    23    19       34  8.54[90me[39m[31m- 7[39m 1.28[90me[39m[31m- 6[39m ****        
[90m5[39m y     Control_Non… MIS-C     23    69       47  1.32[90me

In [11]:
GENE = "TRBV11-2"

WIDTH = 0.948
HEIGHT = 1.5
YLIM = 21

pdf(file=paste0("plots/panelD_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,7,14,21)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 0
[90m# A tibble: 6 × 14[39m
  .y.   group1  group2    n1    n2 statistic     p p.adj p.adj.signif y.position
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m   [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             [3m[90m<dbl>[39m[23m
[90m1[39m y     MIS-C   MIS-C…    69    19       731 0.445 0.534 ns                 20.0
[90m2[39m y     COVID-… MIS-C…    36    19       238 0.062 0.124 ns                 18.9
[90m3[39m y     COVID-… MIS-C     36    69       768 0.001 0.006 **                 17.8
[90m4[39m y     Contro… MIS-C…    23    19       208 0.8   0.8   ns                 16.8
[90m5[39m y     Contro… MIS-C     23    69       663 0.24  0.36  ns                 15.8
[90m6[39m y     Contro… COVID…    23    36       559 0.023 0.069 ns                 14.7
[90m# … with 4 more variables: groups <named l

In [12]:
GENE = "SIGLEC1"

WIDTH = 1.0
HEIGHT = 1.5
YLIM = 660

pdf(file=paste0("plots/panelD_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,220,440,660)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 1
[90m# A tibble: 6 × 14[39m
  .y.   group1         group2    n1    n2 statistic       p   p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m          [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C          MIS-C…    69    19      978  1   [90me[39m[31m-3[39m 1.2 [90me[39m[31m-3[39m **          
[90m2[39m y     COVID-19       MIS-C…    36    19      644  9.3 [90me[39m[31m-8[39m 2.79[90me[39m[31m-7[39m ****        
[90m3[39m y     COVID-19       MIS-C     36    69     [4m2[24m041  6.99[90me[39m[31m-8[39m 2.79[90me[39m[31m-7[39m ****        
[90m4[39m y     Control_Non-i… MIS-C…    23    19      364  2.4 [90me[39m[31m-4[39m 3.6 [90me[39m[31m-4[39m ***         
[90m5[39m y     Control_Non-i… MIS-C     23    69      908. 3.02[90me[39m[

In [13]:
GENE = "KLRB1"

WIDTH = 0.948
HEIGHT = 1.5
YLIM = 100

pdf(file=paste0("plots/panelD_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,33,66,99)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 1
[90m# A tibble: 6 × 14[39m
  .y.   group1       group2    n1    n2 statistic        p    p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C        MIS-C…    69    19       737 4.11[90me[39m[31m- 1[39m 4.11[90me[39m[31m- 1[39m ns          
[90m2[39m y     COVID-19     MIS-C…    36    19       429 1.26[90me[39m[31m- 1[39m 1.89[90me[39m[31m- 1[39m ns          
[90m3[39m y     COVID-19     MIS-C     36    69      [4m1[24m388 3.26[90me[39m[31m- 1[39m 3.91[90me[39m[31m- 1[39m ns          
[90m4[39m y     Control_Non… MIS-C…    23    19       428 4.34[90me[39m[31m-10[39m 1.30[90me[39m[31m- 9[39m ****        
[90m5[39m y     Control_Non… MIS-C     23    69      [4m1[24m504 1.52[90me

In [14]:
GENE = "KLRF1"

WIDTH = 0.948
HEIGHT = 1.5
YLIM = 75

pdf(file=paste0("plots/SupPanelB_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,25,50,75)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 1
[90m# A tibble: 6 × 14[39m
  .y.   group1         group2    n1    n2 statistic       p   p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m          [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C          MIS-C…    69    19      965  2   [90me[39m[31m-3[39m 2.4 [90me[39m[31m-3[39m **          
[90m2[39m y     COVID-19       MIS-C…    36    19      578  2.95[90me[39m[31m-5[39m 5.9 [90me[39m[31m-5[39m ****        
[90m3[39m y     COVID-19       MIS-C     36    69     [4m1[24m675  3   [90me[39m[31m-3[39m 3   [90me[39m[31m-3[39m **          
[90m4[39m y     Control_Non-i… MIS-C…    23    19      408. 1.55[90me[39m[31m-6[39m 4.65[90me[39m[31m-6[39m ****        
[90m5[39m y     Control_Non-i… MIS-C     23    69     [4m1[24m339  8.72[90

In [15]:
GENE = "CD177"

WIDTH = 1.037
HEIGHT = 1.5
YLIM = 1800

pdf(file=paste0("plots/SupPanelB_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,600,1200,1800)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 0
[90m# A tibble: 6 × 14[39m
  .y.   group1       group2    n1    n2 statistic        p    p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C        MIS-C…    69    19      437  2.7 [90me[39m[31m- 2[39m 2.7 [90me[39m[31m- 2[39m *           
[90m2[39m y     COVID-19     MIS-C…    36    19      105  9.37[90me[39m[31m- 6[39m 1.41[90me[39m[31m- 5[39m ****        
[90m3[39m y     COVID-19     MIS-C     36    69      750. 9.17[90me[39m[31m- 4[39m 1.10[90me[39m[31m- 3[39m **          
[90m4[39m y     Control_Non… MIS-C…    23    19        0  3.24[90me[39m[31m- 8[39m 6.48[90me[39m[31m- 8[39m ****        
[90m5[39m y     Control_Non… MIS-C     23    69       27  4.84[90me[39m[31m-12[39m

In [16]:
GENE = "ISG15"

WIDTH = 1.037
HEIGHT = 1.5
YLIM = 1200

pdf(file=paste0("plots/SupPanelB_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,400,800,1200)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 2
[90m# A tibble: 6 × 14[39m
  .y.   group1         group2    n1    n2 statistic       p   p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m          [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C          MIS-C…    69    19       720 5.16[90me[39m[31m-1[39m 6.19[90me[39m[31m-1[39m ns          
[90m2[39m y     COVID-19       MIS-C…    36    19       558 6.8 [90me[39m[31m-5[39m 1.36[90me[39m[31m-4[39m ***         
[90m3[39m y     COVID-19       MIS-C     36    69      [4m1[24m930 3.46[90me[39m[31m-6[39m 1.04[90me[39m[31m-5[39m ****        
[90m4[39m y     Control_Non-i… MIS-C…    23    19       217 9.8 [90me[39m[31m-1[39m 9.8 [90me[39m[31m-1[39m ns          
[90m5[39m y     Control_Non-i… MIS-C     23    69       713 4.71[90me[39m[

In [17]:
GENE = "CREB3L1"

WIDTH = 0.948
HEIGHT = 1.5
YLIM = 21

pdf(file=paste0("plots/SupPanelB_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

get_fig_plot(GENE,meta_data,counts_cpm,YLIM) + 
#     scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) + 
    scale_y_continuous(breaks = c(0,7,14,21)) + 
    coord_cartesian(ylim = c(0,YLIM))

dev.off()

[1] "num outliers:"
[1] 0
[90m# A tibble: 6 × 14[39m
  .y.   group1         group2    n1    n2 statistic       p   p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m          [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m y     MIS-C          MIS-C…    69    19       448 2.6 [90me[39m[31m-2[39m 3.12[90me[39m[31m-2[39m *           
[90m2[39m y     COVID-19       MIS-C…    36    19       153 1.35[90me[39m[31m-4[39m 4.53[90me[39m[31m-4[39m ***         
[90m3[39m y     COVID-19       MIS-C     36    69       860 3   [90me[39m[31m-3[39m 6   [90me[39m[31m-3[39m **          
[90m4[39m y     Control_Non-i… MIS-C…    23    19        87 1.51[90me[39m[31m-4[39m 4.53[90me[39m[31m-4[39m ***         
[90m5[39m y     Control_Non-i… MIS-C     23    69       514 4   [90me[39m[31m-3[39

---
## gene module scores

In [23]:
get_plots <- function(ct_counts,expGroupPalette,DiagnosisPalette,YLIM){

    
    ct_counts$timepoint <- factor(ct_counts$timepoint,levels=c("acute","post-acute","1m","3m+","Not-hospitalized"))

    common <- list(theme_prevail(),
        theme(legend.position = "none",
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
#         panel.border = element_blank(),
        panel.grid.minor = element_blank()))

    box_common <- list(scale_x_discrete(expand = c(0, 0)),
        scale_fill_manual(values=DiagnosisPalette))


    ct_counts_SUM <- Rmisc::summarySE(ct_counts,
                 measurevar="value", 
                 groupvars=c("timepoint","Diagnosis")) %>% filter(N>1) %>% filter( !grepl("Control_Non-inflammatory",Diagnosis))

    MEAN_CNTRL <- mean(ct_counts[which(grepl("Control_Non-inflammatory",ct_counts$Diagnosis)),"value"])

    ##------------------------------------
    # Control Boxplot

    cntrl_boxplot <- ct_counts %>% 
    filter(grepl("Control",Diagnosis)) %>%
    ggplot(aes(x=Diagnosis,y=value,fill=expGroup))+
    geom_boxplot(width=0.5/4,size = 0.2,outlier.size = .01)+
    stat_summary(fun=mean, geom="point", shape=18, size=.2, color="white", fill="white") 

    ##------------------------------------
    # Acute Boxplot

acute_boxplot <- ct_counts %>% 
    filter(grepl("acute",timepoint) | grepl("Control_Non-inflammatory",Diagnosis)) %>%
#     mutate(expGroup = factor(expGroup,levels=c("Control_Non-inflammatory_discovery","MIS-C_validation","MIS-C_discovery","COVID-19_discovery")))%>%
    mutate(expGroup = factor(expGroup,levels=c("Control_Non-inflammatory","COVID-19","MIS-C","MIS-C_validation")))%>%
    ggplot(aes(x=expGroup,y=value))+
    geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .5, linetype = "dashed",color = DiagnosisPalette[["Control_Non-inflammatory"]][1] )+
    geom_boxplot(aes(fill=expGroup), width=0.75,size = 0.2,outlier.size = .01, outlier.shape=NA)+
    stat_summary(fun=mean, geom="point", shape=18, size=.2, color="white", fill="white") 

    
    
    ##------------------------------------ 
    # LINE PLOT
    pd <- position_dodge(0.05)

    line_plot <- ct_counts_SUM %>%
    mutate(timepoint = factor(timepoint,levels=c("acute","post-acute","1m","3m+","Not-hospitalized"))) %>%
    ggplot(aes(x=timepoint, y=value, colour=Diagnosis, group=Diagnosis)) + 
        geom_hline(yintercept = MEAN_CNTRL,alpha = 0.75, size = .5, linetype = "dashed",color = DiagnosisPalette[["Control_Non-inflammatory"]][1] ) +
        geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1, position=pd) +
        geom_line(position=pd) +
        geom_point(position=pd, size = 0.5)+
    scale_x_discrete(expand = c(.1, 0))+
    scale_color_manual(values=DiagnosisPalette)

    ##------------------------------------
    # Customize

    # Calculate Y limits
    y_range_cntl <- layer_scales(cntrl_boxplot)$y$range$range
    y_range_acute <- layer_scales(acute_boxplot)$y$range$range
    y_range_ln <- layer_scales(line_plot)$y$range$range

    starts <- c(y_range_cntl[1],y_range_acute[1],y_range_ln[1],0)
    stops <- c(y_range_cntl[2],y_range_acute[2],y_range_ln[2])
    
    proper_scale <- list(coord_cartesian(ylim = c(min(starts), max(stops))), 
                    scale_y_continuous(labels = function(x) format(x, scientific = TRUE, digits = 2),
                                       breaks = seq(0,max(stops),length.out  = 4)))
    
    
    
    ##---------------------------------------
    # ADD SIG BARS
    
    stat.test <- ct_counts %>% filter(grepl("acute",timepoint) | grepl("Control_Non-inflammatory",Diagnosis)) %>%
    wilcox_test(value ~ expGroup, paired = FALSE) %>% 
    adjust_pvalue(method = "BH") %>% 
    add_significance("p") %>% 
    add_xy_position(x = "expGroup") 
    
    stat.test <- stat.test %>% arrange(desc(y.position))
    stat.test$rank <- c(1:nrow(stat.test))
    stat.test$y.position <- YLIM - ((0.05*stat.test$rank)*YLIM)
                                       
    sig_bars <- stat_pvalue_manual(stat.test, label = "p.adj.signif",tip.length=0) 
    
    acute_boxplot <- acute_boxplot + sig_bars
    
#     return(acute_boxplot)
    print(stat.test)
    
    ##---------------------------------------
    # FINALIZE
            
    line_plot <- line_plot + common + proper_scale
                                       
    cntrl_boxplot <- cntrl_boxplot + common + box_common + proper_scale
                        
    acute_boxplot <- acute_boxplot + common + box_common + proper_scale
    


    
    return(list("line_plot" = line_plot,
                "cntrl_boxplot" = cntrl_boxplot,
                "acute_boxplot" = acute_boxplot))
    
    }

DiagnosisPalette = c('#5CB2EB','#F0484E','#FBE77C','#2BC0B3')
names(DiagnosisPalette) <- c("MIS-C","COVID-19","Control_Non-inflammatory","MIS-C_validation")

expGroupPalette = c('#2BC0B3','#5CB2EB','#F0484E','#FBE77C')
names(expGroupPalette) <- c("MIS-C_validation","MIS-C_discovery","COVID-19_discovery","Control_Non-inflammatory_discovery")


In [24]:
GROUPS = c("COVID-19", "Control_Non-inflammatory","MIS-C")



meta_data <- read.csv("../1_sample-data/STable7_wbrna-samples.csv") 

wb_ftcounts <- read.delim("../1_sample-data/wbrna_ftcounts.txt",row.names=1)  

wb_ftcounts <- wb_ftcounts[,colSums(wb_ftcounts) > 1]

#------------------------
## FILTER

gene.list <- read.delim("../0_support-files/genelist.remove.tsv",col.names = c("type,","ENSMBL","gene_symbol"))

gene.ids <- gsub("\\..*","",rownames(wb_ftcounts))

exclude.idx <- gene.ids %in% gene.list[,2]

wb_ftcounts = wb_ftcounts[!exclude.idx,] 

#------------------------
## NORMALIZE

wb_ftcounts <- edgeR::cpm(wb_ftcounts)

In [25]:
NOT_LEFT = list(theme(axis.text.y = element_blank(),axis.ticks.y=element_blank()))



# # ACUTE - LINE
lay <- rbind(c(2,2,1,1))
ACUTE_MARGIN = list(theme(plot.margin=grid::unit(c(0.02,-.025,0,0), "in")),scale_x_discrete(expand = expansion(add = 0.5)))
# ACUTE_MARGIN = theme(plot.margin=grid::unit(c(0,-.2,0,0), "in"))
CNTRL_MARGIN = theme(plot.margin=grid::unit(c(0,-.5,0,-.3), "in"))
LINE_MARGIN = theme(plot.margin=grid::unit(c(0.02,0.02,0,-0.025), "in"))
GRID_fun <- function(acute_boxplot, cntrl_boxplot, line_plot, lay){grid.arrange( line_plot, acute_boxplot, layout_matrix = lay)}


## get data frame with counts
tcr_ftcounts <- wb_ftcounts[grepl("TRBV11-2",rownames(wb_ftcounts)),] %>% data.frame()
colnames(tcr_ftcounts) <- "TRVB11.2"


ID_KEYS = c("wbrna_sample_id","Diagnosis","group","timepoint")
df <- merge(meta_data[,ID_KEYS], tcr_ftcounts, by.x= "wbrna_sample_id", by.y=0)
output_melt <- melt(df,id.vars = ID_KEYS) %>% mutate(Diagnosis_group = paste0(Diagnosis,"_",group)) %>% 
        mutate(expGroup = Diagnosis) %>% 
        mutate(expGroup = ifelse(group == "validation","MIS-C_validation",expGroup))

output_melt <- output_melt %>% filter(group %in% c("discovery","validation"))





In [26]:
WIDTH = 2.75
HEIGHT = 1.2

##------------------------------------
GENE = 'TRBV11-2'

LIM = 12


all_plots <- get_plots(output_melt,expGroupPalette,DiagnosisPalette,LIM)

LIMITS =  list(scale_y_continuous(breaks = seq(0,LIM,length.out  = 5)), #labels = function(x) format(x, scientific = TRUE, digits = 2)),
               coord_cartesian(ylim = c(0,LIM))
              )

cntrl_boxplot <- all_plots[["cntrl_boxplot"]] + CNTRL_MARGIN + NOT_LEFT + LIMITS
acute_boxplot <- all_plots[["acute_boxplot"]] + ACUTE_MARGIN + LIMITS
line_plot <- all_plots[["line_plot"]] + LINE_MARGIN + NOT_LEFT + LIMITS
               
pdf(file=paste0("plots/SupPanelC_",GENE,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

all_plt <- GRID_fun(acute_boxplot, cntrl_boxplot, line_plot, lay)

print(all_plt)
               
dev.off()



“NaNs produced”


[90m# A tibble: 6 × 15[39m
  .y.   group1         group2    n1    n2 statistic       p   p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m          [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m value MIS-C          MIS-C…    81    19       857 4.43[90me[39m[31m-1[39m 5.32[90me[39m[31m-1[39m ns          
[90m2[39m value COVID-19       MIS-C…    48    19       303 3.1 [90me[39m[31m-2[39m 6.2 [90me[39m[31m-2[39m ns          
[90m3[39m value COVID-19       MIS-C     48    81      [4m1[24m155 1.07[90me[39m[31m-4[39m 6.42[90me[39m[31m-4[39m ***         
[90m4[39m value Control_Non-i… MIS-C…    23    19       208 8   [90me[39m[31m-1[39m 8   [90me[39m[31m-1[39m ns          
[90m5[39m value Control_Non-i… MIS-C     23    81       783 2.45[90me[39m[31m-1[39m 3.68[90me[39m

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.



TableGrob (1 x 4) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,3-4) arrange gtable[layout]
2 2 (1-1,1-2) arrange gtable[layout]


In [27]:
GENE_NAME <- "KLRB1"


## get data frame with counts
tcr_ftcounts <- wb_ftcounts[grepl(GENE_NAME,rownames(wb_ftcounts)),] %>% data.frame()
colnames(tcr_ftcounts) <- GENE_NAME


ID_KEYS = c("wbrna_sample_id","Diagnosis","group","timepoint")
df <- merge(meta_data[,ID_KEYS], tcr_ftcounts, by.x= "wbrna_sample_id", by.y=0)
output_melt <- melt(df,id.vars = ID_KEYS) %>% mutate(Diagnosis_group = paste0(Diagnosis,"_",group)) %>% 
        mutate(expGroup = Diagnosis) %>% 
        mutate(expGroup = ifelse(group == "validation","MIS-C_validation",expGroup))
# output_melt <- output_melt %>% filter(group %in% c("discovery","validation"))


##-----------------------------------
LIM = 100
WIDTH = 2.75
HEIGHT = 1.2

all_plots <- get_plots(output_melt,expGroupPalette,DiagnosisPalette,LIM)

LIMITS =  list(scale_y_continuous(breaks = seq(0,LIM,length.out  = 5)), #labels = function(x) format(x, scientific = TRUE, digits = 2)),
               coord_cartesian(ylim = c(0,LIM))
              )

cntrl_boxplot <- all_plots[["cntrl_boxplot"]] + CNTRL_MARGIN + NOT_LEFT + LIMITS
acute_boxplot <- all_plots[["acute_boxplot"]] + ACUTE_MARGIN + LIMITS
line_plot <- all_plots[["line_plot"]] + LINE_MARGIN + NOT_LEFT + LIMITS
               
pdf(file=paste0("plots/SupPanelC_",GENE_NAME,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

all_plt <- GRID_fun(acute_boxplot, cntrl_boxplot, line_plot, lay)

print(all_plt)
               
dev.off()

“NaNs produced”


[90m# A tibble: 6 × 15[39m
  .y.   group1       group2    n1    n2 statistic        p    p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m value MIS-C        MIS-C…    81    19      846. 5.01[90me[39m[31m- 1[39m 5.01[90me[39m[31m- 1[39m ns          
[90m2[39m value COVID-19     MIS-C…    48    19      574. 1.04[90me[39m[31m- 1[39m 1.56[90me[39m[31m- 1[39m ns          
[90m3[39m value COVID-19     MIS-C     48    81     [4m2[24m242. 1.48[90me[39m[31m- 1[39m 1.78[90me[39m[31m- 1[39m ns          
[90m4[39m value Control_Non… MIS-C…    23    19      428  4.34[90me[39m[31m-10[39m 1.30[90me[39m[31m- 9[39m ****        
[90m5[39m value Control_Non… MIS-C     23    81     [4m1[24m771  4.95[90me[39m[31m-11[39m 2.97[9

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.



TableGrob (1 x 4) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,3-4) arrange gtable[layout]
2 2 (1-1,1-2) arrange gtable[layout]


In [28]:
GENE_NAME <- "ADAMTS2$"


## get data frame with counts
tcr_ftcounts <- wb_ftcounts[grepl(GENE_NAME,rownames(wb_ftcounts)),] %>% data.frame()
colnames(tcr_ftcounts) <- GENE_NAME


ID_KEYS = c("wbrna_sample_id","Diagnosis","group","timepoint")
df <- merge(meta_data[,ID_KEYS], tcr_ftcounts, by.x= "wbrna_sample_id", by.y=0)
output_melt <- melt(df,id.vars = ID_KEYS) %>% mutate(Diagnosis_group = paste0(Diagnosis,"_",group)) %>% 
        mutate(expGroup = Diagnosis) %>% 
        mutate(expGroup = ifelse(group == "validation","MIS-C_validation",expGroup))
# output_melt <- output_melt %>% filter(group %in% c("discovery","validation"))


##-----------------------------------
LIM = 120
WIDTH = 2.75
HEIGHT = 1.2

all_plots <- get_plots(output_melt,expGroupPalette,DiagnosisPalette,LIM)

LIMITS =  list(scale_y_continuous(breaks = seq(0,LIM,length.out  = 5)), #labels = function(x) format(x, scientific = TRUE, digits = 2)),
               coord_cartesian(ylim = c(0,LIM))
              )

cntrl_boxplot <- all_plots[["cntrl_boxplot"]] + CNTRL_MARGIN + NOT_LEFT + LIMITS
acute_boxplot <- all_plots[["acute_boxplot"]] + ACUTE_MARGIN + LIMITS
line_plot <- all_plots[["line_plot"]] + LINE_MARGIN + NOT_LEFT + LIMITS
               
pdf(file=paste0("plots/SupPanelC_",GENE_NAME,".pdf"),
        width=WIDTH,height=HEIGHT, paper="special", bg="white",
        fonts="Helvetica", colormodel = "srgb", pointsize=6, useDingbats = FALSE)

all_plt <- GRID_fun(acute_boxplot, cntrl_boxplot, line_plot, lay)

print(all_plt)
               
dev.off()

“NaNs produced”


[90m# A tibble: 6 × 15[39m
  .y.   group1       group2    n1    n2 statistic        p    p.adj p.adj.signif
  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m1[39m value MIS-C        MIS-C…    81    19      [4m1[24m354 2.87[90me[39m[31m- 7[39m 5.74[90me[39m[31m- 7[39m ****        
[90m2[39m value COVID-19     MIS-C…    48    19       676 2   [90me[39m[31m- 3[39m 2.4 [90me[39m[31m- 3[39m **          
[90m3[39m value COVID-19     MIS-C     48    81      [4m1[24m547 5.3 [90me[39m[31m- 2[39m 5.3 [90me[39m[31m- 2[39m ns          
[90m4[39m value Control_Non… MIS-C…    23    19        34 8.54[90me[39m[31m- 7[39m 1.28[90me[39m[31m- 6[39m ****        
[90m5[39m value Control_Non… MIS-C     23    81        61 7.57[90me[39m[31m-12[39m 4.54[9

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Coordinate system already present. Adding new coordinate system, which will replace the existing one.



TableGrob (1 x 4) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,3-4) arrange gtable[layout]
2 2 (1-1,1-2) arrange gtable[layout]


---
# PAPER

In [8]:
covid_control %>% filter(padj < 0.01) %>% filter(abs(log2FoldChange) > 1.5) %>% nrow()
misc_covid %>% filter(padj < 0.01) %>% filter(abs(log2FoldChange) > 1.5) %>% nrow()
misc_control %>% filter(padj < 0.01) %>% filter(abs(log2FoldChange) > 1.5) %>% nrow()

In [10]:
misc_control %>% 
    filter(padj < 0.01) %>% filter(abs(log2FoldChange) > 1.5) %>% 
    arrange(desc(log2FoldChange)) %>% head()

covid_control %>% 
    filter(padj < 0.01) %>% filter(abs(log2FoldChange) > 1.5) %>% 
    arrange(desc(log2FoldChange)) %>% head()

Unnamed: 0_level_0,GeneID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type,gene_id
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<chr>
1,ENSG00000087116.16_ADAMTS2,89.49833,8.74551,0.5187532,16.858711,9.054248999999999e-64,2.327666e-59,,,ENSG00000087116.16_ADAMTS2
2,ENSG00000152463.15_OLAH,46.05071,8.35103,0.6401892,13.044628,6.818868e-39,1.593631e-35,,,ENSG00000152463.15_OLAH
3,ENSG00000204936.10_CD177,348.46319,8.117924,0.5151844,15.757316,6.117692e-56,3.931841e-52,,,ENSG00000204936.10_CD177
4,ENSG00000135424.18_ITGA7,85.53971,6.67233,0.489574,13.62885,2.6979729999999998e-42,7.706611000000001e-39,,,ENSG00000135424.18_ITGA7
5,ENSG00000169174.11_PCSK9,10.61449,6.341073,0.7053181,8.990374,2.4639029999999995e-19,3.0600010000000005e-17,,,ENSG00000169174.11_PCSK9
6,ENSG00000088340.17_FER1L4,11.22655,6.124927,0.6094961,10.049164,9.264964e-24,2.507197e-21,,,ENSG00000088340.17_FER1L4


Unnamed: 0_level_0,GeneID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type,gene_id
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<chr>
1,ENSG00000087116.16_ADAMTS2,84.088364,8.802788,0.7471866,11.781245,4.876733e-32,6.518079000000001e-28,,,ENSG00000087116.16_ADAMTS2
2,ENSG00000152463.15_OLAH,10.971858,6.372328,0.8855958,7.195526,6.22204e-13,2.169436e-10,,,ENSG00000152463.15_OLAH
3,ENSG00000115155.19_OTOF,57.669326,6.286742,0.6953879,9.040626,1.5577699999999998e-19,4.164128e-16,,,ENSG00000115155.19_OTOF
4,ENSG00000204936.10_CD177,86.39521,6.27738,0.5714206,10.98557,4.483995e-28,2.996579e-24,,,ENSG00000204936.10_CD177
5,ENSG00000165949.12_IFI27,794.929388,5.965423,0.5144332,11.596108,4.312476e-31,4.322934e-27,,,ENSG00000165949.12_IFI27
6,ENSG00000108821.14_COL1A1,5.215902,5.71844,1.2027668,4.754405,1.990321e-06,7.451531e-05,,,ENSG00000108821.14_COL1A1


In [18]:
GENES = c("IFIT2","SIGLEC1","IFI27","IFI44L","ISG15","IFIT3")

length(GENES)

misc_covid %>% 
    filter(padj < 0.01) %>%
    filter(abs(log2FoldChange) > 1.5) %>% 
    filter(log2FoldChange < 0) %>%
    mutate(gene_name = gsub(".*_","",gene_id)) %>% 
    filter(gene_name %in% GENES)


GENES = c("ITGA7","CDHR1","CD177","PGF","ERFE","MMP8")

length(GENES)

misc_covid %>% 
    filter(padj < 0.01) %>%
    filter(abs(log2FoldChange) > 1.5) %>% 
    filter(log2FoldChange > 0) %>%
    mutate(gene_name = gsub(".*_","",gene_id)) %>% 
    filter(gene_name %in% GENES)

# t <- misc_covid%>%filter(padj < 0.01) %>%
#     filter(abs(log2FoldChange) > 1.5) %>%
#     mutate(gene_name = gsub(".*_","",gene_id)) %>% pull(gene_name)

# GENES[!(GENES %in% t)]

misc_covid %>% filter(grepl("PGF",gene_id))

GeneID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type,gene_id
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<lgl>,<chr>
ENSG00000088827.13_SIGLEC1,140.0858,-3.282118,0.3890308,-8.436653,3.265552e-17,8.605273e-14,SIGLEC1,,ENSG00000088827.13_SIGLEC1
ENSG00000119917.15_IFIT3,400.0178,-2.665187,0.3658884,-7.284153,3.236972e-13,3.411985e-10,IFIT3,,ENSG00000119917.15_IFIT3
ENSG00000119922.11_IFIT2,432.0327,-2.480976,0.350472,-7.078955,1.452451e-12,1.208668e-09,IFIT2,,ENSG00000119922.11_IFIT2
ENSG00000137959.17_IFI44L,332.7485,-3.013164,0.3804576,-7.919843,2.378113e-15,4.177816e-12,IFI44L,,ENSG00000137959.17_IFI44L
ENSG00000165949.12_IFI27,454.5962,-3.143293,0.418359,-7.513386,5.761723e-14,7.59155e-11,IFI27,,ENSG00000165949.12_IFI27
ENSG00000187608.10_ISG15,240.0384,-3.005481,0.3425449,-8.773977,1.724641e-18,9.089435e-15,ISG15,,ENSG00000187608.10_ISG15


GeneID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type,gene_id
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<lgl>,<chr>
ENSG00000118113.12_MMP8,85.2022,1.969649,0.4106022,4.796976,1.610789e-06,0.0002234052,MMP8,,ENSG00000118113.12_MMP8
ENSG00000135424.18_ITGA7,74.65281,2.185655,0.3659543,5.972483,2.3367e-09,8.39672e-07,ITGA7,,ENSG00000135424.18_ITGA7
ENSG00000148600.15_CDHR1,15.70823,1.794664,0.3251531,5.519444,3.400731e-08,8.814582e-06,CDHR1,,ENSG00000148600.15_CDHR1
ENSG00000178752.16_ERFE,13.2976,1.785071,0.4795986,3.722009,0.0001976439,0.007022442,ERFE,,ENSG00000178752.16_ERFE
ENSG00000204936.10_CD177,313.08038,1.896199,0.4100149,4.624706,3.751293e-06,0.0004062444,CD177,,ENSG00000204936.10_CD177


GeneID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_name,gene_type,gene_id
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<chr>
ENSG00000119630.14_PGF,2.586193,2.10993,0.6350881,3.322264,0.0008929015,0.01928021,,,ENSG00000119630.14_PGF
