# Compare Tissues

In [None]:
results_paths = c(
    adrenal = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/01-Adrenal/output/adrenal__infected_vs_uninfected.de.csv",
    spleen = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/02-Spleen/output/spleen.de.csv",
    lymph_node_ax_r = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/03-LymphNode_AX_R/output/LN_AX_R.de.csv",
    lymph_node_ing_l = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/04-LymphNode_ING_L/output/LN_ING_L.de.csv",
    lymph_node_mes = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/05-LymphNode_MES/output/LN_MES.de.csv",
    kidney = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/06-Kidney/output/Kidney.de.csv",
    liver = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/07-Liver/output/Liver.de.csv",
    brain_gray = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/08-Brain_Brain-Gr/output/BrainGray.de.csv",
    sex_organ_ovary = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/10-SexOrgan_Ovary/output/SexOrgan_Ovary.de.csv",
    sex_organ_sex_organ = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/12-SexOrgan_SexOrgan/output/SexOrgan_SexOrgan.de.csv",
    skin_rash = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/13-Skin_Rash/output/Skin_Rash.de.csv",
    lung = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/14-Lung/output/Lung.de.csv",
    skin_non_rash = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/15-Skin_NonRash/output/Skin_NonRash.de.csv",
    brain_white = "/home/nbarkas/disk2/ebov_bulk_rna_seq/proc_20210329/08-simple-de-by-tissue/16-Brain_Brain-Wh/output/Brain_White.de.csv"
)

In [None]:
results.all.tissues <- lapply(results_paths, read.csv)

In [None]:
source('../../prj_helpers.R')

In [None]:
de.genes.all.tissues <- lapply(results.all.tissues, function(x) {
    xp <- x[NA2FALSE(x$padj < 0.05),]
    unique(xp$Geneid)
})

In [None]:
n.de.per.tissue <- unlist(lapply(de.genes.all.tissues, length))
n.de.df.plot <- data.frame(
    tissue = names(n.de.per.tissue),
    n.genes = n.de.per.tissue
)

In [None]:
library(ggplot2)
library(forcats)
library(tidyverse)

In [None]:
options(repr.plot.width=6, repr.plot.height=6)
ggplot(n.de.df.plot, aes(x=fct_reorder(tissue, desc(n.genes)),y=n.genes)) +
    geom_bar(stat='identity') + 
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
    scale_x_discrete("Number of differentially expressed genes")

In [None]:
ggsave('output/n_de_genes.png',w=10,h=10)

In [None]:
tissues.expressed.cutoff <- 4
recurrent.genes <- names(which(sort(table(unlist(de.genes.all.tissues)),decreasing = T) > tissues.expressed.cutoff))

In [None]:
recurrent.genes <- recurrent.genes[recurrent.genes != '']

In [None]:
tmp1 <- lapply(results.all.tissues, function(x) {
    rownames(x) <- x$Geneid
    x[recurrent.genes,]$log2FoldChange
})
tmp2 <- do.call(cbind, tmp1)
rownames(tmp2) <- recurrent.genes

In [None]:
# Create a gene map
gene_map <- results.all.tissues[[1]][,c('Geneid','external_gene_name')]
head(gene_map)

In [None]:
# Convert ens ids to gene names, if they can be converted. Keep as is otherwise
new.names <- sapply(rownames(tmp2), function(x) { 
    new.name <- gene_map[match(x, gene_map$Geneid),'external_gene_name']
    ifelse(new.name != '', new.name, x)
})
rownames(tmp2) <- new.names

### Heatmap of recurrent genes

In [None]:
library(pheatmap)
library(viridis)

In [None]:
pheatmap(tmp2,show_rownames = TRUE, color=inferno(10), fontsize_row=6, filename='output/recurrent_genes.png')

In [None]:
options(repr.plot.width=6, repr.plot.height=15)
pheatmap(tmp2,show_rownames = TRUE, color=inferno(10), fontsize_row=6)

# PCA of fold changes

In [None]:
str(results.all.tissues,1)

In [None]:
head(results.all.tissues$adrenal,n=5)

In [None]:
z <- lapply(results.all.tissues, function(x) {x$X})

In [None]:
all(z[[1]] == z[[2]])

In [None]:
head(z[[1]])

In [None]:
results.all.tissues.o_gene <- lapply(results.all.tissues, function(x) {x[order(x$X),]})

In [None]:
all(results.all.tissues.o_gene[[1]]$X == results.all.tissues.o_gene[[2]]$X)

In [None]:
flc.mat <- do.call(cbind,lapply(results.all.tissues.o_gene, function(x) {x$log2FoldChange}))
rownames(flc.mat) <- results.all.tissues.o_gene[[1]]$X

In [None]:
flc.mat[is.na(flc.mat)] <- 0

In [None]:
pca.lfc <- prcomp(t(flc.mat))

In [None]:
str(pca.lfc)

In [None]:
options(repr.plot.width=7, repr.plot.height=7)
plot(pca.lfc$x[,1:2])

In [None]:
plot.df <- data.frame(tissue=rownames(pca.lfc$x),pca.lfc$x[,1:3])

In [None]:
head(plot.df)

In [None]:
library(ggrepel)

In [None]:
options(repr.plot.width=9, repr.plot.height=9)
ggplot(plot.df, aes(color=tissue, x=PC1, y=PC2)) +
    geom_point(size=6) + 
    theme_bw() + theme(legend.position='none') + 
    geom_label_repel(aes(label=tissue))
    

In [None]:
# pick genes de in at least one tissue
de.genes <- unique(unlist(lapply(results.all.tissues, function(x) {x$X[x$padj < 0.05]})))

In [None]:
flc.mat[1:3,1:3]

In [None]:
head(de.genes)

In [None]:
table(de.genes %in% rownames(flc.mat))

In [None]:
de.genes <- de.genes[!is.na(de.genes)]

In [None]:
pca.lfc.sel <- prcomp(t(scale(flc.mat[de.genes,])))

In [None]:
plot.df <- data.frame(tissue=rownames(pca.lfc.sel$x),pca.lfc.sel$x[,1:3])

In [None]:
ggplot(plot.df, aes(color=tissue, x=PC1, y=PC2)) + 
    geom_point(size=6) + 
    theme_bw() + 
    geom_label_repel(aes(label=tissue)) + 
    ggtitle('PCA of LogFoldChanges')

In [None]:
ggsave('output/pca_lfc.png',w=8,h=8)

In [None]:
# Show the genes weighted in these PCs

In [None]:
n.top.genes.per.pc <- 20

loaded.genes <- unique(c(
    head(names(sort(abs(pca.lfc.sel$rotation[,c('PC1')]),decreasing = T)),n=n.top.genes.per.pc),
    head(names(sort(abs(pca.lfc.sel$rotation[,c('PC2')]),decreasing = T)),n=n.top.genes.per.pc)
    ))

In [None]:
loading_scale <- 1e2
loaded.genes.df <- as.data.frame(pca.lfc.sel$rotation[loaded.genes,c('PC1','PC2')])

In [None]:
loaded.genes.df$symbol <- gene_map$external_gene_name[match(rownames(loaded.genes.df), gene_map$Geneid)]

In [None]:
ggplot(plot.df, aes(color=tissue, x=PC1, y=PC2)) + 
    theme_bw() + 
    geom_segment(
        alpha=0.5,
        arrow = arrow(),
        data=loaded.genes.df,
        aes(x=0,y=0,xend=PC1*loading_scale,yend=PC2*loading_scale),inherit.aes=F
    ) +   ggtitle('PCA of LogFoldChanges') +
    geom_label_repel(data=loaded.genes.df,aes(x=PC1*loading_scale,y=PC2*loading_scale,label=symbol),inherit.aes=F) +
    geom_point(size=8) +
    geom_label_repel(aes(label=tissue)) +
    theme(legend.position = 'none')

In [None]:
ggsave('output/pca_lfc_withloadings.png',w=10,h=10)

# Specific Genes Across Tissues

## Hits from Testes

In [None]:
ggplot_rotate_labels <- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

In [None]:
# NEURL3 (ENSMMUG00000000006.4)
reshape2::melt(flc.mat['ENSMMUG00000000006.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000022839.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('ESR2 (ENSMMUG00000022839.4)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000013283.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('CDHR2 (ENSMMUG00000013283.4)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000042375.2',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('INSL3 (ENSMMUG00000042375.2)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000040100.2',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('FOS (ENSMMUG00000040100.2)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000006727.3',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('VDR (ENSMMUG00000006727.3)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000056083.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('GPC1 (ENSMMUG00000056083.1)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000003468.3',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('GADD45B (ENSMMUG00000003468.3)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000038702.2',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('ZFP36 (EENSMMUG00000038702.2)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000023701.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('LRG1 (ENSMMUG00000023701.4)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000063131.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('MOB3C (ENSMMUG00000063131.1)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000021293.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('NRCAM (ENSMMUG00000021293.4)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000058428.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SOCS3 (ENSMMUG00000058428.1)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000048424.2',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SDC4 (ENSMMUG00000048424.2)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000048424.2',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SDC4 (ENSMMUG00000048424.2)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000032106.3',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('MOB3C (ENSMMUG00000032106.3)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000060580.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SAT1 (ENSMMUG00000060580.1)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000061478.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SARM1 (ENSMMUG00000061478.1)')

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000058805.1',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('SPATA12 (ENSMMUG00000058805.1)')

In [None]:
head(gene_map)

In [None]:
gene_map[gene_map$external_gene_name == 'COL27A1',]

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000031973.3',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    theme_bw() + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('COL27A1 (ENSMMUG00000031973.3)') 

In [None]:
gene_map[gene_map$external_gene_name == 'COL1A1',]

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000001467.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    theme_bw() + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('COL1A1 (ENSMMUG00000001467.4)') 

In [None]:
gene_map[gene_map$external_gene_name == 'COL18A1',]

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000004472.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    theme_bw() + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('COL18A1 (ENSMMUG00000004472.4)') 

In [None]:
gene_map[gene_map$external_gene_name == 'COL3A1',]

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000021286.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    theme_bw() + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('COL3A1 (ENSMMUG00000021286.4)') 

In [None]:
gene_map[gene_map$external_gene_name == 'COL6A3',]

In [None]:
reshape2::melt(flc.mat['ENSMMUG00000014049.4',,drop=FALSE]) %>% 
    ggplot(aes(x=Var2,y=value)) + 
    geom_bar(stat='identity') + 
    theme_bw() + 
    ggplot_rotate_labels + theme(axis.text=element_text(size=16)) +
    ggtitle('COL6A3 (ENSMMUG00000014049.4)') 