In [None]:
using MKtest, JLD2, DataFrames, RCall, CSV

In [None]:
labstorage = "/labstorage/jmurgamoreno/Immune_Adaptation_Atlas_2023/";
path = "/home/jmurgamoreno/Immune_Adaptation_Atlas_2023/";
@rput labstorage;
@rput path;

In [None]:
R"""
library(magrittr)
library(paletteer)
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)

empirical_p <- function(df,symbol){
    x = split(df,df$cell)

    out = list()
    for(i in names(x)){
        x[[i]]$pvalue = rank(x[[i]][[symbol]] * -1,na.last = "keep")/sum(is.na (x[[i]][[symbol]]) == F)
        x[[i]]$pvalue_strong = rank(x[[i]][[paste0(symbol,"_strong")]] * -1,na.last = "keep")/sum(is.na (x[[i]][[paste0(symbol,"_strong")]]) == F)
        x[[i]]$pvalue_weak = rank(x[[i]][[paste0(symbol,"_weak")]] * -1,na.last = "keep")/sum(is.na (x[[i]][[paste0(symbol,"_weak")]]) == F)
        out[[i]] = x[[i]]
    }
    return(rbindlist(out))
}
"""

# Cell lines

In [None]:

h = jldopen(path * "abmkc/results_immune_abc.jld2")

results_abc = h["cell_lines"]
results_abc.cell = replace.(results_abc.cell,"_case.txt"=>"")
results_abc.cell = replace.(results_abc.cell,"_control.txt"=>"")

pal = CSV.read(path * "/annotations/line_cell_palette.txt",DataFrame)

results_abc = innerjoin(results_abc,pal,on=[:cell,:line])

@rput results_abc
@rput pal

## $\alpha$

In [None]:
R"""
df = results_abc
df$cell = gsub("_case.txt","",df$cell)
df$cell = gsub("_control.txt","",df$cell)
df$cell = gsub("_FULL_ENSEMBL_TOP","",df$cell)

df = df %>% select(-c('rank')) %>% as.data.table


df = empirical_p(df,"α")

df = df[cell!=line]

df[cell=='all']$cell = paste0(df[cell=='all']$cell,"_",df[cell=='all']$line)

for(i in c("CMP","DNearly_T","DNP_T","EARLY_MK","GMP","MEMP","MEP","PRE_PRO_B","PROMONOCYTE","PROMYELOCYTE")){
    df[cell==i]$cell = paste0(df[cell==i]$cell,"_",df[cell==i]$line)
}

df[cell=='Progenitor']$cell = paste0(df[cell=='Progenitor']$cell,"_",df[cell=='Progenitor']$line)

"""

In [None]:
R"""

d = melt(df[,c(1:6,16:19)],id.vars=c("line","cell","subline","type","n","pvalue","color"))

d$cell = gsub("_","-",d$cell)
d$cell = gsub("DNearly","DN(early)",d$cell)
d$cell = gsub("DNP","DN(P)",d$cell)
d$cell = gsub("DNQ","DN(Q)",d$cell)
d$cell = gsub("DPP","DP(P)",d$cell)

d$cell = gsub("-HSC-progenitors"," (HSC-progenitors)",d$cell)
d$cell = gsub("-Myeloid-ALL"," (Myeloid-ALL)",d$cell)

d = d[order(variable,decreasing=T)]

d_p= melt(df[,c(1:3,16:21)],id.vars=c("line","cell","subline","type","n","color"))

d_p$cell = gsub("_","-",d_p$cell)
d_p$cell = gsub("DNearly","DN(early)",d_p$cell)
d_p$cell = gsub("DNP","DN(P)",d_p$cell)
d_p$cell = gsub("DNQ","DN(Q)",d_p$cell)
d_p$cell = gsub("DPP","DP(P)",d_p$cell)
d_p$cell = gsub("-HSC-progenitors"," (HSC-progenitors)",d_p$cell)
d_p$cell = gsub("-Myeloid-ALL"," (Myeloid-ALL)",d_p$cell)

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>% mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )

tmp = d[type == "case" & variable == "α"]
tmp = split(tmp,tmp$subline)

subline_order <- c("HSC_progenitors", "Tcells", "Bcells", "Innate", "MACs","Granulocytes","DCs","Stromal","MegaKaryocytes","Erythrocytes")

tmp = tmp[subline_order]

for(i in names(tmp)){
	tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell

d$variable = factor(d$variable,levels=c("α","α_strong","α_weak"))
d$cell = factor(d$cell,levels=x)
d_p$cell = factor(d_p$cell,levels=x)
d$pvalue = d_p$value
d$label = d_p$label

d_flt = d[!grepl("all",d$cell)]
cols = c("line","cell")
d_flt = d_flt %>% group_by(across(all_of(cols))) %>% mutate("n"=n()) %>% as.data.table
d_flt = d_flt[n>3]

x_pos = c(12.5,22.5,31.5,36.5,47.5,51.5,58.5,62.5,67.5)
clrs = d_flt[type=='case',c('cell','color')] %>% unique
clrs = clrs[order(cell)]
p = ggplot(d_flt[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin(width=0.8) + 
   geom_errorbar(data=d_flt[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d_flt[type=="case"],aes(x=cell,y=-0.05,label = label,color=line),size=10,color="black",angle = 90, vjust = 0.75, hjust=1) +
   geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   facet_wrap(~variable,ncol=1,scales='free_y') + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 45, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18), 
   ) +
   ylab(expression(alpha))+ ylim(-0.1,0.7) + scale_fill_manual(values=clrs$color,labels=clrs$cell) + guides(fill="none")

for(i in 1:10){
    if(i == 1){
        p = p + annotate("text", x = x_pos[1]/2, y =0.7,label=unique(tmp$subline)[i])
    }else if(i==10){
        p = p + annotate("text", x = 71.5 - (71.5 - x_pos[i-1])/2, y =0.7,label=unique(tmp$subline)[i])
    }else{
        p = p + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.7,label=unique(tmp$subline)[i])
    }
}

ggsave(p,filename=paste0(path,"Developmental/cell_lines_abcmk_top_500_orthologs.svg"),height=20,width=30) 

"""

In [None]:
R"""
df_case_alpha = df[type=='case'] %>% select(-c("ωₙₐ","ωₐ","ωₐ_strong","ωₐ_weak"))
df_case_alpha = df_case_alpha[order(df_case_alpha[,6],decreasing=T)]
fwrite(df_case_alpha,paste0(path,"Developmental/abcmk_inference_alpha_top_500_orthologs.txt"),sep='\t')
"""

## $\omega_a$

In [None]:
R"""
df = empirical_p(df,"ωₐ")

d = melt(df[,c(1:3,7:9,16:19)],id.vars=c("line","cell","subline","type","n","pvalue","color"))

d$cell = gsub("_","-",d$cell)
d$cell = gsub("DNearly","DN(early)",d$cell)
d$cell = gsub("DNP","DN(P)",d$cell)
d$cell = gsub("DNQ","DN(Q)",d$cell)
d$cell = gsub("DPP","DP(P)",d$cell)


d$cell = gsub("-HSC-progenitors"," (HSC-progenitors)",d$cell)
d$cell = gsub("-Myeloid-ALL"," (Myeloid-ALL)",d$cell)


d = d[order(variable,decreasing=T)]

d_p= melt(df[,c(1:3,16:21)],id.vars=c("line","cell","subline","type","n","color"))

d_p$cell = gsub("_","-",d_p$cell)
d_p$cell = gsub("DNearly","DN(early)",d_p$cell)
d_p$cell = gsub("DNP","DN(P)",d_p$cell)
d_p$cell = gsub("DNQ","DN(Q)",d_p$cell)
d_p$cell = gsub("DPP","DP(P)",d_p$cell)
d_p$cell = gsub("-HSC-progenitors"," (HSC-progenitors)",d_p$cell)
d_p$cell = gsub("-Myeloid-ALL"," (Myeloid-ALL)",d_p$cell)

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>% mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )

tmp = d[type == "case" & variable == "ωₐ"]
tmp = split(tmp,tmp$subline)

subline_order <- c("HSC_progenitors", "Tcells", "Bcells", "Innate", "MACs","Granulocytes","DCs","Stromal","MegaKaryocytes","Erythrocytes")

tmp = tmp[subline_order]

for(i in names(tmp)){
    tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell

d$variable = factor(d$variable,levels=c("ωₐ","ωₐ_strong","ωₐ_weak"))
d$cell = factor(d$cell,levels=x)
d_p$cell = factor(d_p$cell,levels=x)
d$pvalue = d_p$value
d$label = d_p$label

d_flt = d[!grepl("all",d$cell)]
cols = c("line","cell")
d_flt = d_flt %>% group_by(across(all_of(cols))) %>% mutate("n"=n()) %>% as.data.table
d_flt = d_flt[n>3]

x_pos = c(12.5,22.5,31.5,36.5,47.5,51.5,58.5,62.5,67.5)
clrs = d_flt[type=='case',c('cell','color')] %>% unique
clrs = clrs[order(cell)]

p = ggplot(d_flt[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin(width=0.8) + 
   geom_errorbar(data=d_flt[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d_flt[type=="case"],aes(x=cell,y=-0.025,label = label,color=line),size=10,color="black",angle = 90, vjust = 0.75, hjust=1) +
   geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   facet_wrap(~variable,ncol=1,scales='free_y') + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 45, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18), 
   ) +
   ylab(expression(omega[a]))+ ylim(-0.06,0.3) + scale_fill_manual(values=clrs$color,labels=clrs$cell) + guides(fill="none")

for(i in 1:10){
    if(i == 1){
        p = p + annotate("text", x = x_pos[1]/2, y =0.3,label=unique(tmp$subline)[i])
    }else if(i==10){
        p = p + annotate("text", x = 71.5 - (71.5 - x_pos[i-1])/2, y =0.3,label=unique(tmp$subline)[i])
    }else{
        p = p + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.3,label=unique(tmp$subline)[i])
    }
}

ggsave(p,filename=paste0(path,"/Developmental/cell_lines_abcmk_omega_a_top_500_orthologs.svg"),height=20,width=30) 

"""

In [None]:
R"""
df_case_omega = df[type=='case'] %>% select(-c("α","α_strong","α_weak","n","color","subline","γ₋","γ₊","γ₊₊","β","B"))
df_case_omega = df_case_omega[order(df_case_omega[,6],decreasing=T)]
fwrite(df_case_omega,paste0(path,"Developmental/abcmk_inference_omega_top_500_orthologs.txt"),sep='\t')
"""

## Tables

In [None]:
ci = CSV.read(path * "Developmental/abcmk_ci.txt",DataFrame)
@rput ci

R"""
df_ci = as.data.table(ci)
df_ci$cell = gsub("_case.txt","",df_ci$cell)
df_ci$cell = gsub("_control.txt","",df_ci$cell)
df_ci$cell = gsub("_FULL_ENSEMBL_TOP","",df_ci$cell)


df_ci = df_ci[cell!=line]

df_ci[cell=='all']$cell = paste0(df_ci[cell=='all']$cell,"_",df_ci[cell=='all']$line)

for(i in c("CMP","DNearly_T","DNP_T","EARLY_MK","GMP","MEMP","MEP","PRE_PRO_B","PROMONOCYTE","PROMYELOCYTE")){
    df_ci[cell==i]$cell = paste0(df_ci[cell==i]$cell,"_",df_ci[cell==i]$line)
}

df_publication = merge(df_ci,df_case_alpha[,c("cell","line","pvalue_weak","pvalue_strong","pvalue")])
df_publication = merge(df_publication,df_case_omega[,c("cell","line","pvalue_weak","pvalue_strong","pvalue")])

names(df_publication)[15:20] = c("pvalue_weak_alpha","pvalue_strong_alpha","pvalue_alpha","pvalue_weak_omega_a","pvalue_strong_omega_a","pvalue_omega_a")
fwrite(df_publication,paste0(path,"Developmental/cell_lines/cell_lines_abcmk_ci_pvalues.txt"),sep='\t')
"""

# Adult tissues

In [None]:
h = jldopen(path * "abmkc/results_immune_abc.jld2")

results_abc = h["adult_tissues"]

pal = CSV.read(path * "annotations/line_cell_palette.txt",DataFrame)

@rput results_abc
@rput pal

## $\alpha$

In [None]:
R"""
df = as.data.table(results_abc) 

df$cell = gsub("_case.txt","",df$cell)
df$cell = gsub("_control.txt","",df$cell)
df$cell = gsub("_FULL_ENSEMBL_TOP","",df$cell)

tcells = data.table(cell=df[line=='Tcells']$cell %>%unique,color=paletteer_c("ggthemes::Blue",df[line=='Tcells']$cell %>% unique %>% length))
bcells = data.table(cell=df[line=='Bcells']$cell %>% unique,color=paletteer_c("ggthemes::Green",df[line=='Bcells']$cell %>% unique %>% length))
macs = data.table(cell=df[line=='Myeloid']$cell %>% unique,color=paletteer_c("ggthemes::Purple",df[line=='Myeloid']$cell %>% unique %>% length))
pal = rbindlist(list(tcells,bcells,macs))

df = empirical_p(df,"α")

df = merge(df,pal)


df_l = df[cell==line]
df = df[cell!=line]
# df = df[cell!='T_CD4_CD8']

d = melt(df[,c(1:6,16:17,20)],id.vars=c("line","cell","type","n","pvalue","color"))

d = d[order(variable,decreasing=T)]

d_p = melt(df[,c(1:3,16:20)],id.vars=c("line","cell","type","n","color"))

d_p$variable='pvalue'

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>% mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )


tmp = d[type == "case" & variable == "α"]
tmp = split(tmp,tmp$line)

line_order <- c("Tcells", "Bcells","Myeloid")

tmp = tmp[line_order]

for(i in names(tmp)){
    tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell

d$variable = factor(d$variable,levels=c("α","α_strong","α_weak"))
d$cell = factor(d$cell,levels=x)
d_p$cell = factor(d_p$cell,levels=x)
d$pvalue = d_p$value
d$label = d_p$label

d_flt = d[!grepl("all",d$cell)]
cols = c("line","cell")
d_flt = d_flt %>% group_by(across(all_of(cols))) %>% mutate("n"=n()) %>% as.data.table
d_flt = d_flt[n>3]

x_pos = c(16.5,24.5)
clrs = d_flt[type=='case',c('cell','color')] %>% unique
clrs = clrs[order(cell)]
p = ggplot(d_flt[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin(width=0.8) + 
   geom_errorbar(data=d_flt[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d_flt[type=="case"],aes(x=cell,y=-0.05,label = label,color=line),size=10,color="black",angle = 90, vjust = 0.75, hjust=1) +
   geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   facet_wrap(~variable,ncol=1,scales='free_y') + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 45, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18), 
   ) +
   ylab(expression(alpha))+ ylim(-0.1,0.7) + scale_fill_manual(values=clrs$color,labels=clrs$cell) + guides(fill="none")

for(i in 1:3){
    if(i == 1){
        p = p + annotate("text", x = x_pos[1]/2, y =0.7,label=unique(tmp$line)[i])
    }else if(i==3){
        p = p + annotate("text", x = 33.5 - (33.5 - x_pos[i-1])/2, y =0.7,label=unique(tmp$line)[i])
    }else{
        p = p + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.7,label=unique(tmp$line)[i])
    }
}

ggsave(p,filename=paste0(path,"Adult/abcmk_adult_tissues.svg"),height=15,width=15)

""";

In [None]:
R"""
df_case_alpha = df[type=='case'] %>% select(-c("ωₙₐ","ωₐ","ωₐ_strong","ωₐ_weak"))
df_case_alpha = df_case_alpha[order(df_case_alpha[,6],decreasing=T)]
fwrite(df_case_alpha,paste0(path,"Adult/abcmk_inference_alpha_top_500_orthologs.txt"),sep='\t')
"""

## $\omega_a$

In [None]:
R"""

df = empirical_p(df,"ωₐ")

d = melt(df[,c(1:3,7:9,16:17,20)],id.vars=c("line","cell","type","n","pvalue","color"))

d = d[order(variable,decreasing=T)]

d_p= melt(df[,c(1:3,16:20)],id.vars=c("line","cell","type","n","color"))

d_p$variable='pvalue'

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>% mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )


tmp = d[type == "case" & variable == "ωₐ"]
tmp = split(tmp,tmp$line)

line_order <- c("Tcells", "Bcells","Myeloid")

tmp = tmp[line_order]

for(i in names(tmp)){
    tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell

d$variable = factor(d$variable,levels=c("ωₐ","ωₐ_strong","ωₐ_weak"))
d$cell = factor(d$cell,levels=x)
d_p$cell = factor(d_p$cell,levels=x)
d$pvalue = d_p$value
d$label = d_p$label

d_flt = d[!grepl("all",d$cell)]
cols = c("line","cell")
d_flt = d_flt %>% group_by(across(all_of(cols))) %>% mutate("n"=n()) %>% as.data.table
d_flt = d_flt[n>3]

x_pos = c(16.5,24.5)
clrs = d_flt[type=='case',c('cell','color')] %>% unique
clrs = clrs[order(cell)]
p = ggplot(d_flt[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin(width=0.8) + 
   geom_errorbar(data=d_flt[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d_flt[type=="case"],aes(x=cell,y=-0.025,label = label,color=line),size=10,color="black",angle = 90, vjust = 0.75, hjust=1) +
   geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   facet_wrap(~variable,ncol=1,scales='free_y') + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 45, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18), 
   ) +
   ylab(expression(omega[a]))+ ylim(-0.1,0.3) + scale_fill_manual(values=clrs$color,labels=clrs$cell) + guides(fill="none")

for(i in 1:3){
    if(i == 1){
        p = p + annotate("text", x = x_pos[1]/2, y =0.3,label=unique(tmp$line)[i])
    }else if(i==3){
        p = p + annotate("text", x = 33.5 - (33.5 - x_pos[i-1])/2, y =0.3,label=unique(tmp$line)[i])
    }else{
        p = p + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.3,label=unique(tmp$line)[i])
    }
}

ggsave(p,filename=paste0(path,"Adult/abcmk_omega_a_adult_tissues.svg"),height=15,width=15)

""";

In [None]:
R"""
df_case_omega = df[type=='case'] %>% select(-c("α","α_strong","α_weak","n","color","γ₋","γ₊","γ₊₊","β","B"))
df_case_omega = df_case_omega[order(df_case_omega[,6],decreasing=T)]
fwrite(df_case_omega,paste0(path,"Adult/abcmk_inference_omega_a_top_500_orthologs.txt"),sep='\t')
"""

## Tables

In [None]:
ci = CSV.read(path * "Adult/abcmk_ci.txt",DataFrame)
@rput ci

R"""
df_ci = as.data.table(ci)
df_ci$cell = gsub("_case.txt","",df_ci$cell)
df_ci$cell = gsub("_control.txt","",df_ci$cell)
df_ci$cell = gsub("_FULL_ENSEMBL_TOP","",df_ci$cell)


df_ci = df_ci[cell!=line]

df_ci[cell=='all']$cell = paste0(df_ci[cell=='all']$cell,"_",df_ci[cell=='all']$line)

for(i in c("CMP","DNearly_T","DNP_T","EARLY_MK","GMP","MEMP","MEP","PRE_PRO_B","PROMONOCYTE","PROMYELOCYTE")){
    df_ci[cell==i]$cell = paste0(df_ci[cell==i]$cell,"_",df_ci[cell==i]$line)
}

df_publication = merge(df_ci,df_case_alpha[,c("cell","line","pvalue_weak","pvalue_strong","pvalue")])
df_publication = merge(df_publication,df_case_omega[,c("cell","line","pvalue_weak","pvalue_strong","pvalue")])

names(df_publication)[15:20] = c("pvalue_weak_alpha","pvalue_strong_alpha","pvalue_alpha","pvalue_weak_omega_a","pvalue_strong_omega_a","pvalue_omega_a")
fwrite(df_publication,paste0(path,"Adult/adult_tissues_abcmk_ci_pvalues.txt"),sep='\t')
"""

# Macrophages activation

In [None]:
h = jldopen(path * "abmkc/results_immune_abc.jld2")
results_abc = h["macrophages_activation"]
@rput results_abc

In [None]:
R"""
empirical_p <- function(df,symbol,column){
    x = split(df,df[[column]])

    out = list()
    for(i in names(x)){
        x[[i]]$pvalue = rank(x[[i]][[symbol]] * -1,na.last = "keep")/sum(is.na (x[[i]][[symbol]]) == F)
        x[[i]]$pvalue_strong = rank(x[[i]][[paste0(symbol,"_strong")]] * -1,na.last = "keep")/sum(is.na (x[[i]][[paste0(symbol,"_strong")]]) == F)
        x[[i]]$pvalue_weak = rank(x[[i]][[paste0(symbol,"_weak")]] * -1,na.last = "keep")/sum(is.na (x[[i]][[paste0(symbol,"_weak")]]) == F)
        out[[i]] = x[[i]]
    }
    return(rbindlist(out))
}
"""

## $\alpha$

### Time

In [None]:
R"""

df = results_abc %>% as.data.table
pal = data.table(diff=c("all","Pro/anti-inflammatory","Pro/anti-inflammatory","Pro/anti-inflammatory","Viral mimics","Viral mimics","Bacterial mimics","Bacterial mimics","Bacterial mimics","Bacterial mimics","Neurodegeneration"),cell=c("all","IFNB","IFNG","IL4","R484","PIC","sLPS","P3C","CIL","LIL10","MBP"),clrs=c("#58b9fd","#faae98","#f98d83","#e22618","#206fb5","#134474","#fdfb60","#f7de35","#f2b21b","#f88a19","#be2b70"))

df$cell = gsub(".txt","",df$cell)

df[grepl("all",df$cell)]$cell = paste0(df[grepl("all",df$cell)]$cell,"_case")

df = df %>% separate(cell, sep = "_", into = c("cell", "time")) %>% select(-c(line)) %>% as.data.table

df = merge(df,pal)
df[time=='genes']$time = df[time=='genes']$cell

df[time=='genes']$time = df[time=='genes']$cell
df$time = tolower(df$time) 

df[cell=='Early' & time=='early']$cell = 'all'
df[cell=='Inter' & time=='inter']$cell = 'all'
df[cell=='Late' & time=='late']$cell = 'all'

df$cell_time = paste0(df$cell,"_",df$time)
df = empirical_p(df,"α","cell_time")

df$cell = gsub("_case.txt","",df$cell)
df$cell = gsub("_control.txt","",df$cell)
df$cell = gsub("_FULL_ENSEMBL_TOP","",df$cell)
df = df %>% select(-c(cell_time))

d = melt(df[,c(1:6,16:19)],id.vars=c("cell","time","type","n","pvalue","clrs","diff"))
d = d[order(variable,decreasing=T)]

d_p = melt(df[,c(1:3,16:21)],id.vars=c("cell","time","type","n","clrs","diff"))

d = d[order(variable,decreasing=T)]
d_p$variable='pvalue'

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>%
    mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )


tmp = d[type == "case" & variable == "α" & time == 'early']
tmp = split(tmp,tmp$diff)

line_order <- c("Pro/anti-inflammatory","Viral mimics","Bacterial mimics","Neurodegeneration","all")

tmp = tmp[line_order]

for(i in names(tmp)){
    tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell


d$variable = factor(d$variable,levels=c("α","α_strong","α_weak"))
d$cell = factor(d$cell,levels=unique(x))
d_p$cell = factor(d_p$cell,levels=unique(x))
d$pvalue = d_p$value
d$label = d_p$label

x_pos = c(3.5,5.5,9.5,10.5)
clrs = d[type=='case',c('cell','clrs')] %>% unique
clrs = clrs[match(x,cell)]
clrs = rbind(clrs,rbind(clrs,clrs))

d[cell == 'all']$cell = stringr::str_to_title(d[cell == 'all']$cell)
tmp[cell == 'all']$diff = stringr::str_to_title(tmp[cell == 'all']$diff)

clrs[cell == 'all']$cell = stringr::str_to_title(clrs[cell == 'all']$cell)

p1 = ggplot(d[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin() +  
   geom_errorbar(data=d[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
  geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   geom_text(data=d[type=="case" & cell != 'all'],aes(x=cell,y=-0.05,label = label,color=line),size=8,color="black",angle = 90, vjust = 0.75, hjust=1) +
   facet_wrap(~variable+time,ncol=3) + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 90, vjust = 0.5, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18) 
   ) +
   ylab(expression(alpha))  + ylim(-0.15,1) +   scale_fill_manual(values=clrs$clrs,labels=clrs$cell) + guides(fill="none")


for(i in 1:5){
    if(i == 1){
        p1 = p1 + annotate("text", x = x_pos[1]/2, y =0.7,label=unique(tmp$diff)[i])
    }else if(i==4){
              p1 = p1 + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.8,label="Cell damage",angle = 90)
    }
    else if(i==5){
        p1 = p1 + annotate("text", x = 11.5 - (11.5 - x_pos[i-1])/2, y =0.7,label=unique(tmp$diff)[i])
    }else{
        p1 = p1 + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.7,label=unique(tmp$diff)[i])
    }
}

ggsave(paste0(path,"Macrophages/abc_macrophages_time_top_500_orthologs_palette.svg"),p1,width=20,height=10)
"""


### Estimuli

In [None]:

R"""
names(d)[2] = "Time"
d$Time = stringr::str_to_title(d$Time)
clrs_stml= c("#ff9494","#a973c9","#a9a9ff")
p2 = ggplot(d[type=="control" ],aes(x=Time,y=value,fill=Time)) +
   geom_violin() + 
   geom_errorbar(data=d[type=="case"],aes(x = Time, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d[type=="case"],aes(x=Time,y=-0.05,label = label,color=line),size=8,color="black",angle = 90, vjust = 0.75, hjust=1) +
   facet_wrap(~variable+cell,nrow=3) + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 90, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18) 
   ) +
   ylab(expression(alpha)) + ylim(-0.25,1) + scale_fill_manual(values=clrs_stml)


ggsave(paste0(path,"Macrophages/macrophages_activation/abc_macrophages_cell_top_500_orthologs_palette.svg"),p2,width=20,height=10)

"""

In [None]:
R"""
df_case_alpha = df[type=='case'] %>% select(-c("ωₙₐ","ωₐ","ωₐ_strong","ωₐ_weak"))
df_case_alpha = df_case_alpha[order(df_case_alpha[,6],decreasing=T)]
fwrite(df_case_alpha,paste0(path,"Macrophages/abcmk_inference_alpha_top_500_orthologs.txt"),sep='\t')
"""

## $\omega_a$

### Time

In [None]:
R"""

df = results_abc %>% as.data.table
pal = data.table(diff=c("all","Pro/anti-inflammatory","Pro/anti-inflammatory","Pro/anti-inflammatory","Viral mimics","Viral mimics","Bacterial mimics","Bacterial mimics","Bacterial mimics","Bacterial mimics","Neurodegeneration"),cell=c("all","IFNB","IFNG","IL4","R484","PIC","sLPS","P3C","CIL","LIL10","MBP"),clrs=c("#58b9fd","#faae98","#f98d83","#e22618","#206fb5","#134474","#fdfb60","#f7de35","#f2b21b","#f88a19","#be2b70"))

df$cell = gsub(".txt","",df$cell)

df[grepl("all",df$cell)]$cell = paste0(df[grepl("all",df$cell)]$cell,"_case")

df = df %>% separate(cell, sep = "_", into = c("cell", "time")) %>% select(-c(line)) %>% as.data.table

df = merge(df,pal)
df[time=='genes']$time = df[time=='genes']$cell

df[time=='genes']$time = df[time=='genes']$cell
df$time = tolower(df$time) 

df[cell=='Early' & time=='early']$cell = 'all'
df[cell=='Inter' & time=='inter']$cell = 'all'
df[cell=='Late' & time=='late']$cell = 'all'

df$cell_time = paste0(df$cell,"_",df$time)
df = empirical_p(df,"α","cell_time")

df$cell = gsub("_case.txt","",df$cell)
df$cell = gsub("_control.txt","",df$cell)
df$cell = gsub("_FULL_ENSEMBL_TOP","",df$cell)
df = df %>% select(-c(cell_time))

d = melt(df[,c(1:3,7:9,16:19)],id.vars=c("cell","time","type","n","pvalue","clrs","diff"))
d = d[order(variable,decreasing=T)]

d_p = melt(df[,c(1:3,16:21)],id.vars=c("cell","time","type","n","clrs","diff"))

d = d[order(variable,decreasing=T)]
d_p$variable='pvalue'

d_p$label=""
d_p[type=='case'] = d_p[type=='case'] %<>%
    mutate(
        label = case_when(
            value > 0.05 ~ "",
            value > 0.01 ~ "*",
            value > 0.001 ~ "**",
            value > 0.0001 ~ "***",
            value > 0.00001 ~ "****",
            !is.na(value) ~ "",
            TRUE ~ NA_character_
        )
    )


tmp = d[type == "case" & variable == "ωₐ" & time == 'early']
tmp = split(tmp,tmp$diff)

line_order <- c("Pro/anti-inflammatory","Viral mimics","Bacterial mimics","Neurodegeneration","all")

tmp = tmp[line_order]

for(i in names(tmp)){
    tmp[[i]] = tmp[[i]][order(value,decreasing=T)]
}

tmp = rbindlist(tmp)
x = tmp$cell


d$variable = factor(d$variable,levels=c("ωₐ","ωₐ_strong","ωₐ_weak"))
d$cell = factor(d$cell,levels=unique(x))
d_p$cell = factor(d_p$cell,levels=unique(x))
d$pvalue = d_p$value
d$label = d_p$label

x_pos = c(3.5,5.5,9.5,10.5)
clrs = d[type=='case',c('cell','clrs')] %>% unique
clrs = clrs[match(x,cell)]
clrs = rbind(clrs,rbind(clrs,clrs))

d[cell == 'all']$cell = stringr::str_to_title(d[cell == 'all']$cell)
tmp[cell == 'all']$diff = stringr::str_to_title(tmp[cell == 'all']$diff)

clrs[cell == 'all']$cell = stringr::str_to_title(clrs[cell == 'all']$cell)

p1 = ggplot(d[type=="control"],aes(x=cell,y=value,fill=cell)) +
   geom_violin() +  
   geom_errorbar(data=d[type=="case"],aes(x = cell, ymin = value,ymax = value),size=1,color="black") + 
  geom_vline(xintercept = x_pos, linetype = "dotted",size=1) + 
   geom_text(data=d[type=="case" & cell != 'all'],aes(x=cell,y=-0.05,label = label,color=line),size=8,color="black",angle = 90, vjust = 0.75, hjust=1) +
   facet_wrap(~variable+time,ncol=3) + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 90, vjust = 0.5, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18) 
   ) +
   ylab(expression(alpha))  + ylim(-0.15,3) +   scale_fill_manual(values=clrs$clrs,labels=clrs$cell) + guides(fill="none")


for(i in 1:5){
    if(i == 1){
        p1 = p1 + annotate("text", x = x_pos[1]/2, y =0.7,label=unique(tmp$diff)[i])
    }else if(i==4){
              p1 = p1 + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.3,label="Cell damage",angle = 90)
    }
    else if(i==5){
        p1 = p1 + annotate("text", x = 11.5 - (11.5 - x_pos[i-1])/2, y =0.3,label=unique(tmp$diff)[i])
    }else{
        p1 = p1 + annotate("text", x = x_pos[i] - (x_pos[i] - x_pos[i-1])/2, y =0.3,label=unique(tmp$diff)[i])
    }
}

ggsave(paste0(path,"Macrophages/abc_macrophages_omega_a_time_top_500_orthologs_palette.svg"),p1,width=20,height=10)
"""


### Estimuli

In [None]:
R"""
names(d)[2] = "Time"
d$Time = stringr::str_to_title(d$Time)
clrs_stml= c("#ff9494","#a973c9","#a9a9ff")
p2 = ggplot(d[type=="control"],aes(x=Time,y=value,fill=Time)) +
   geom_violin() + 
   geom_errorbar(data=d[type=="case"],aes(x = Time, ymin = value,ymax = value),size=1,color="black") + 
   geom_text(data=d[type=="case"],aes(x=Time,y=-0.05,label = label,color=line),size=8,color="black",angle = 90, vjust = 0.75, hjust=1) +
   facet_wrap(~variable+cell,nrow=3) + 
   theme_bw() +
   theme(
       strip.text = element_text(size = 20),
       axis.title=element_text(size=20),
       axis.text.x = element_text(size = 16,angle = 90, vjust = 1, hjust=1),
       axis.text.y = element_text(size = 16), 
       axis.title.x = element_text(size = 16),  
       axis.title.y = element_text(size = 16),
       legend.text=element_text(size=16),
       legend.title=element_text(size=18) 
   ) +
   ylab(expression(alpha)) + ylim(-0.15,0.3) + scale_fill_manual(values=clrs_stml)


ggsave(paste0(path,"Macrophages/abc_macrophages_cell_omega_a_top_500_orthologs_palette.svg"),p2,width=20,height=10)

"""

In [None]:
R"""
df_case_omega = df[type=='case'] %>% select(-c("α","α_strong","α_weak","n","γ₋","γ₊","γ₊₊","β","B"))
df_case_omega = df_case_omega[order(df_case_omega[,6],decreasing=T)]
fwrite(df_case_omega,paste0(path,"Macrophages/abcmk_inference_omega_a_top_500_orthologs.txt"),sep='\t')
"""

## Tables

In [None]:
ci = CSV.read(path * "Macrophages/abcmk_ci.txt",DataFrame)
@rput ci

R"""
df_ci = as.data.table(ci)
df_ci$cell = gsub("_case.txt","",df_ci$cell)
df_ci$cell = gsub("_control.txt","",df_ci$cell)
df_ci$cell = gsub("_FULL_ENSEMBL_TOP","",df_ci$cell)

df_publication = merge(df_ci,df_case_alpha[,c("cell","time","pvalue_weak","pvalue_strong","pvalue")])
df_publication = merge(df_publication,df_case_omega[,c("cell","time","pvalue_weak","pvalue_strong","pvalue")])

names(df_publication)[15:20] = c("pvalue_weak_alpha","pvalue_strong_alpha","pvalue_alpha","pvalue_weak_omega_a","pvalue_strong_omega_a","pvalue_omega_a")
fwrite(df_publication,paste0(path,"Macrophages/macrophages_activation_abcmk_ci_pvalues.txt"),sep='\t')
"""