In [1]:
library(edgeR)
library(magrittr)
library(ggplot2)
library(DESeq2)
library(tidyverse)

Loading required package: limma

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following object is masked from ‘package:limma’:

    plotMA


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tap

## Automatise

In [2]:
path_csvs = '/home/jovyan/projects/kk14_DCM-lymphoid/results/for_edgeR/210526/'
path_csvs

In [3]:
x=read.csv(paste0(path_csvs, "ALL_CELLSTATE_PSEUDOBULK_LVS.csv"), row.names=1, check.names=FALSE)
genes_tofilter=read.csv(paste0(path_csvs, "ALL_CELLSTATE_PSEUDOBULK_FILTERING_LVS.csv"), check.names=FALSE)
colnames(genes_tofilter) <- gsub("mutation.negative", 'PVneg', colnames(genes_tofilter))
colnames(genes_tofilter)[1]='X' # since check.names=FALSE remove 'X' from the first column name

# Only needed for the column cell_state
CELLTYPE_STATE <- read.csv(paste0(path_csvs, "CELLSTATE_TRANSLATION_TABLE.csv"))
colnames(CELLTYPE_STATE) <- c("cell_states", 'cell_type')

# Removes patients with only few (<10) nuclei numbers (as they might bias the analysis)
CELLTYPE_FILTER <- read.csv(paste0(path_csvs, "ABSOLUTE_CELLSTATES_NUMBER_LVS.csv"))

l <- strsplit(colnames(x), "__")

In [4]:
head(genes_tofilter)

Unnamed: 0_level_0,X,control_SMC1.2,control_vCM2,control_vCM1.0,control_EC8.0,control_PC1,control_vCM3.0,control_PC3,control_NC2,control_vFB1.0,⋯,PVneg_Meso,PVneg_NC6,PVneg_NC1.3,PVneg_CD4T_reg,PVneg_NK_CD16hiIFNGhi,PVneg_CD8T_te_IFNGhi,PVneg_unclassified.2,PVneg_AD1.1,PVneg_unclassified.1,PVneg_AD3
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
1,MIR1302-2HG,0.0,7.728137e-05,2.363061e-05,0.0,0.0,0.000208686,0.0,0.0,0.0001331824,⋯,0,0,0.0,0.0,0,0.0,0,0.0,0,
2,FAM138A,0.0,4.704111e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0,0,0.0,0.0,0,0.0,0,0.0,0,
3,OR4F5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0,0,0.0,0.0,0,0.0,0,0.0,0,
4,AL627309.1,0.006045433,0.03766473,0.02115259,0.01851852,0.005671691,0.026385827,0.006668741,0.05133929,0.0113499835,⋯,0,0,0.04761905,0.08333334,0,0.006666666,0,0.1111111,0,
5,AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0,0,0.0,0.0,0,0.0,0,0.0,0,
6,AL627309.2,0.0,2.821294e-05,2.547252e-05,0.0,1.529613e-05,0.0,0.0,0.0,0.0001032631,⋯,0,0,0.0,0.0,0,0.0,0,0.0,0,


In [5]:
CELLTYPE_STATE %>% filter(cell_type=='Lymphoid')

cell_states,cell_type
<chr>,<chr>
CD4T_act,Lymphoid
Plasma,Lymphoid
CD8T_em,Lymphoid
CD8T_trans,Lymphoid
CD8T_te,Lymphoid
MAIT-like,Lymphoid
CD4T_naive,Lymphoid
NK_CD56hi,Lymphoid
NK_CD16hi,Lymphoid
ILC,Lymphoid


In [6]:
meta.data <- t(as.data.frame(l))
rownames(meta.data) <- colnames(x)

meta.data <- as.data.frame(meta.data)
colnames(meta.data)  <-  c("Genotype", "cell_state", 'Patient', 'X10X_version', 'Gender')

head(meta.data)

Unnamed: 0_level_0,Genotype,cell_state,Patient,X10X_version,Gender
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>
LMNA__AD1.0__DL2__V3__m,LMNA,AD1.0,DL2,V3,m
LMNA__AD1.0__H05__V3__m,LMNA,AD1.0,H05,V3,m
LMNA__AD1.0__H26__V3__f,LMNA,AD1.0,H26,V3,f
LMNA__AD1.0__H28__V3__m,LMNA,AD1.0,H28,V3,m
LMNA__AD1.0__H31__V3__m,LMNA,AD1.0,H31,V3,m
LMNA__AD1.1__H05__V3__m,LMNA,AD1.1,H05,V3,m


In [7]:
GENOTYPES <- unique(meta.data$Genotype)[-6]
GENOTYPES

In [8]:
CELL_LEVEL <- "CELLSTATE"
REGION <- "LV"

In [9]:
CELLTYPE_FILTER_SUB <- CELLTYPE_FILTER
CELLTYPE_FILTER_SUB[,2:ncol(CELLTYPE_FILTER_SUB)] <- apply(CELLTYPE_FILTER_SUB[,-1], 2, function(i) i>5)

In [10]:
as.character(GENOTYPES)

In [11]:
as.character(unique(meta.data$cell_state))

In [12]:
i <- 1

for(GENOTYPE in as.character(GENOTYPES)){
    message("\n###START: ", GENOTYPE," #####\n")
    for(CELL_STATE in as.character(unique(meta.data$cell_state))){
        
        CONTROL_COLUMN <- paste("control_",CELL_STATE, sep="")
        GENOTYPE_COLUMN <- paste(GENOTYPE,"_", CELL_STATE, sep="")
        CELLTYPE <- as.character(CELLTYPE_STATE[which(CELLTYPE_STATE$cell_states==CELL_STATE),"cell_type"])
        
        
        x_sub <- x[,which(meta.data$Genotype %in% c("control", GENOTYPE))]
        meta.data_sub <- meta.data[which(meta.data$Genotype %in% c("control", GENOTYPE)),]
        
        x_sub <- x_sub[,which(meta.data_sub$cell_state %in% c(CELL_STATE))]
        meta.data_sub <- meta.data_sub[which(meta.data_sub$cell_state %in% c(CELL_STATE)),]
        
        # Suggestion: Remove samples with less than 5 nuclei 
        PATIENTS_TOKEEP <- CELLTYPE_FILTER_SUB[which(CELLTYPE_FILTER_SUB$cell_states==CELL_STATE),-1]
        PATIENTS_TOKEEP <- colnames(PATIENTS_TOKEEP)[which(as.logical(PATIENTS_TOKEEP))]
        x_sub <- x_sub[,which(meta.data_sub$Patient %in% PATIENTS_TOKEEP)]
        meta.data_sub <- meta.data_sub[which(meta.data_sub$Patient %in% PATIENTS_TOKEEP),]
        
        # The test is only run, if 2 conditions have at least 2 patients
        if (length(unique(meta.data_sub$Genotype))==2 &
           all(table(meta.data_sub$Genotype)>2)
           ){
             # Prepare DGEList object
            meta.data_sub$Genotype <- as.factor(as.character(meta.data_sub$Genotype))
            dge <- DGEList(counts=x_sub, group=meta.data_sub$Genotype)
            
            # Filter genes based on expression, the qlf Object will be filtered to exclude them from FDR calculation
            keep <- genes_tofilter[,CONTROL_COLUMN]>0.0125 | ##### change & to | ##### 
                    genes_tofilter[,GENOTYPE_COLUMN]>0.0125
            #dge <- dge[keep, , keep.lib.sizes=FALSE]
    
            # PP, model matrix, https://www.nature.com/articles/nmeth.4612 (edgeRQLFDetRate)
            dge <- calcNormFactors(dge)
            cdr <- scale(colMeans(x_sub > 0))
            design <- model.matrix(~ cdr + meta.data_sub$Genotype)
    
            # 
            dge <- estimateDisp(dge, design = design)
            fit <- glmQLFit(dge, design = design)
            qlf <- glmQLFTest(fit)
    
            # For all
            tt <- topTags(qlf, n = Inf)
            
            # Only for "expressed/detected" genes
            tt_filtered <- topTags(qlf[keep,], n = Inf)
            
            tt$table[,"Gene"] <- as.character(rownames(tt$table))
            tt_filtered$table[,"Gene"] <- as.character(rownames(tt_filtered$table))
            tt_filtered$table[,"low_expression"] <- "F" #####  Low-expression column ##### 
            tt_filtered$table[,"FDR_plot"] <- -log10(tt_filtered$table$FDR) #####  For plotting Volcano, here we use FDR, not pValue  ##### 
            
            
            tt_merged <- merge(tt$table[,c("Gene", "logFC", "logCPM", 'F', 'PValue')], 
                               tt_filtered$table[,c("Gene", "FDR", "low_expression", "FDR_plot")], 
                               by="Gene", all=T)   
            
            # Replace NA FDRs with NA                   #####  ORDER CHANGED #####  
            tt_merged[which(is.na(tt_merged$FDR)),"FDR"] <- 1 
            
            EXPRESSION_MEAN <- genes_tofilter[,c("X", CONTROL_COLUMN, GENOTYPE_COLUMN)]
            colnames(EXPRESSION_MEAN) <- c("Gene", "mean_exp_control", 'mean_exp_genotype')
            tt_merged <- merge(tt_merged, EXPRESSION_MEAN, by="Gene")
    
            
            tt_merged[,"Region"] <- REGION
            tt_merged[,"annotation_level"] <- CELL_LEVEL
            tt_merged[,"cell_state"] <- CELL_STATE
            tt_merged[,"cell_type"] <- CELLTYPE
            tt_merged[,"comparison"] <- paste("control_", GENOTYPE, sep="")
            
            if(i==1){
                 final_df <- tt_merged   
            } else {
                final_df <- rbind(final_df, tt_merged)
            }
            
            i <- i + 1
            message("\n###FINISHED: ", CELL_STATE," #####\n")
        }
    }
}


###START: LMNA #####



###FINISHED: AD1.1 #####



###FINISHED: AD2 #####



###FINISHED: CD16+ Mo #####



###FINISHED: CD4T_act #####



###FINISHED: CD4T_naive #####



###FINISHED: CD8T_cytox #####



###FINISHED: CD8T_em #####



###FINISHED: CD8T_te #####



###FINISHED: CD8T_trans #####



###FINISHED: EC1.0 #####



###FINISHED: EC2.0 #####



###FINISHED: EC5.0 #####



###FINISHED: EC6.0 #####



###FINISHED: EC7.0 #####



###FINISHED: EC8.0 #####



###FINISHED: MAIT-like #####



###FINISHED: MY10 #####



###FINISHED: MY12 #####



###FINISHED: MY14 #####



###FINISHED: MY16 #####



###FINISHED: MY1 #####



###FINISHED: MY2 #####



###FINISHED: MY3 #####



###FINISHED: MY4 #####



###FINISHED: MY5 #####



###FINISHED: MY7 #####



###FINISHED: MY8 #####



###FINISHED: MY9 #####



###FINISHED: Mast #####



###FINISHED: NC1.0 #####



###FINISHED: NC1.1 #####



###FINISHED: NC1.4 #####



###FINISHED: NC2 #####



###FINISHED: NK_CD16hi #####



###FINISHED: NK

In [13]:
meta.data_sub

Unnamed: 0_level_0,Genotype,cell_state,Patient,X10X_version,Gender
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>
control__AD3__BS_H26__V3__m,control,AD3,BS_H26,V3,m


In [14]:
head(final_df)

Unnamed: 0_level_0,Gene,logFC,logCPM,F,PValue,FDR,low_expression,FDR_plot,mean_exp_control,mean_exp_genotype,Region,annotation_level,cell_state,cell_type,comparison
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
1,A1BG,-1.826956,5.236298,0.492527,0.48280344,1,F,0.0,0.01851852,0.0,LV,CELLSTATE,AD1.1,AD,control_LMNA
2,A1BG-AS1,6.143446e-17,5.131525,0.0,1.0,1,,,0.0,0.0,LV,CELLSTATE,AD1.1,AD,control_LMNA
3,A1CF,1.88452,5.213165,0.4492726,0.50268174,1,,,0.0,0.00462963,LV,CELLSTATE,AD1.1,AD,control_LMNA
4,A2M,3.659995,5.564544,3.534719,0.06009868,1,F,0.0,0.0,0.09722222,LV,CELLSTATE,AD1.1,AD,control_LMNA
5,A2M-AS1,-1.415215,5.217481,8.294089e-08,0.99977021,1,,,0.01111111,0.0,LV,CELLSTATE,AD1.1,AD,control_LMNA
6,A2ML1,6.143446e-17,5.131525,0.0,1.0,1,,,0.0,0.0,LV,CELLSTATE,AD1.1,AD,control_LMNA


In [15]:
tail(final_df)

Unnamed: 0_level_0,Gene,logFC,logCPM,F,PValue,FDR,low_expression,FDR_plot,mean_exp_control,mean_exp_genotype,Region,annotation_level,cell_state,cell_type,comparison
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
9424173,ZXDB,-0.8793595,2.6469,2.3759966,0.12967598,1.0,,,0.004488071,0.002088314,LV,CELLSTATE,vFB4,FB,control_TTN
9424174,ZXDC,0.1884164,6.614096,1.5478392,0.21940012,0.4574654,F,0.3396418,0.18139045,0.26925346,LV,CELLSTATE,vFB4,FB,control_TTN
9424175,ZYG11A,-0.1996221,2.986955,0.1418047,0.70812656,1.0,,,0.010091202,0.005229333,LV,CELLSTATE,vFB4,FB,control_TTN
9424176,ZYG11B,0.2527605,6.283061,2.7807413,0.10181252,0.2921935,F,0.5343295,0.14143534,0.21486373,LV,CELLSTATE,vFB4,FB,control_TTN
9424177,ZYX,-0.7261541,4.436926,5.30635,0.02555078,0.1227531,F,0.9109676,0.041144,0.037142362,LV,CELLSTATE,vFB4,FB,control_TTN
9424178,ZZEF1,0.128997,6.690831,0.7087126,0.4039801,0.6458588,F,0.1898624,0.1991941,0.28068942,LV,CELLSTATE,vFB4,FB,control_TTN


## Subset lymphoid DEGs and save

In [16]:
final_df %>% pull(cell_type) %>% unique()

In [17]:
lymphoid_df = final_df %>% filter(cell_type=='Lymphoid')
lymphoid_df

Gene,logFC,logCPM,F,PValue,FDR,low_expression,FDR_plot,mean_exp_control,mean_exp_genotype,Region,annotation_level,cell_state,cell_type,comparison
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
A1BG,-9.533713e-01,6.731763,1.28572414,0.306531924,1.0000000,,,0.007598785,0.004797980,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A1BG-AS1,1.092857e+00,6.932621,1.77966407,0.182191148,0.8059838,F,0.0936737122,0.008897485,0.043700997,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A1CF,1.412016e-16,6.552405,0.00000000,1.000000000,1.0000000,,,0.000000000,0.000000000,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A2M,-1.210069e+00,8.471538,8.07815776,0.004480366,0.4032423,F,0.3944339486,0.366576020,0.121607780,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A2M-AS1,-7.146986e-01,7.267749,1.16631962,0.280159238,0.8697359,F,0.0606126112,0.062041476,0.037469660,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A2ML1,-9.241492e-01,6.593884,2.85490748,0.249162452,1.0000000,,,0.006060606,0.000000000,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A2ML1-AS1,1.925183e+00,6.634470,7.57024551,0.041318237,0.5972205,F,0.2238652790,0.000000000,0.017355371,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A2ML1-AS2,1.412016e-16,6.552405,0.00000000,1.000000000,1.0000000,,,0.000000000,0.000000000,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A3GALT2,1.412016e-16,6.552405,0.00000000,1.000000000,1.0000000,,,0.000000000,0.000000000,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
A4GALT,4.912106e-01,6.665633,0.37792886,0.627032070,1.0000000,,,0.006060606,0.009859360,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA


In [18]:
write.csv(lymphoid_df, file='/home/jovyan/projects/kk14_DCM-lymphoid/results/DEGs_df/210526/LYMPHOIDS_ALLGENOTYPES_EDGER_LV.csv')

In [4]:
lymphoid_df = read.csv('/home/jovyan/projects/kk14_DCM-lymphoid/results/DEGs_df/210526/LYMPHOIDS_ALLGENOTYPES_EDGER_LV.csv', header=TRUE, row.names=1)
head(lymphoid_df)

Unnamed: 0_level_0,Gene,logFC,logCPM,F,PValue,FDR,low_expression,FDR_plot,mean_exp_control,mean_exp_genotype,Region,annotation_level,cell_state,cell_type,comparison
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
1,A1BG,-0.9533713,6.731763,1.285724,0.306531924,1.0,,,0.007598785,0.00479798,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
2,A1BG-AS1,1.092857,6.932621,1.779664,0.182191148,0.8059838,False,0.09367371,0.008897485,0.043701,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
3,A1CF,1.412016e-16,6.552405,0.0,1.0,1.0,,,0.0,0.0,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
4,A2M,-1.210069,8.471538,8.078158,0.004480366,0.4032423,False,0.39443395,0.36657602,0.12160778,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
5,A2M-AS1,-0.7146986,7.267749,1.16632,0.280159238,0.8697359,False,0.06061261,0.062041476,0.03746966,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA
6,A2ML1,-0.9241492,6.593884,2.854907,0.249162452,1.0,,,0.006060606,0.0,LV,CELLSTATE,CD4T_act,Lymphoid,control_LMNA


In [5]:
dim(lymphoid_df)

In [8]:
# Filter table to make list shorter
lymphoid_df_sel = lymphoid_df %>% filter(PValue<0.05)
dim(lymphoid_df_sel)

In [11]:
write.csv(lymphoid_df_sel, file='/home/jovyan/projects/kk14_DCM-lymphoid/for_paper/Supplementary_tables/LYMPHOIDS_DEGs_LV.csv')