In [2]:
library(DESeq2)
library(tidyverse)
library(biomaRt)

In [7]:
cols <- c("Geneid", "DoubPos_1", "DoubPos_2", "DoubPos_3", "mESC_1", "mESC_2", "mESC_3")
read.counts <- read.delim(file = '_STAR_genecounts.txt', sep = '\t', skip = 1, header = T)
read.counts <- read.counts[,c(1,7:12)]
colnames(read.counts) <- cols
read.counts

Geneid,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3
<chr>,<int>,<int>,<int>,<int>,<int>,<int>
ENSMUSG00000104478,0,1,0,0,0,0
ENSMUSG00000104385,0,1,0,0,0,2
ENSMUSG00000086053,0,1,2,0,2,1
ENSMUSG00000101231,0,0,0,0,0,0
ENSMUSG00000102135,0,0,0,0,1,0
ENSMUSG00000103282,0,0,0,0,0,0
ENSMUSG00000101097,0,0,0,0,0,0
ENSMUSG00000100764,2,1,0,0,2,1
ENSMUSG00000102534,0,0,0,0,0,0
ENSMUSG00000100831,0,0,0,0,0,0


In [8]:
read.counts <- column_to_rownames(read.counts, "Geneid")
read.counts <- as.matrix(read.counts)
read.counts

Unnamed: 0,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3
ENSMUSG00000104478,0,1,0,0,0,0
ENSMUSG00000104385,0,1,0,0,0,2
ENSMUSG00000086053,0,1,2,0,2,1
ENSMUSG00000101231,0,0,0,0,0,0
ENSMUSG00000102135,0,0,0,0,1,0
ENSMUSG00000103282,0,0,0,0,0,0
ENSMUSG00000101097,0,0,0,0,0,0
ENSMUSG00000100764,2,1,0,0,2,1
ENSMUSG00000102534,0,0,0,0,0,0
ENSMUSG00000100831,0,0,0,0,0,0


In [9]:
Con_DoubPos <- factor(c(rep("Double_Pos",3), rep("Negative",3)))

In [11]:
coldata <- data.frame(row.names=colnames(read.counts), Con_DoubPos)
coldata

Unnamed: 0_level_0,Con_DoubPos
Unnamed: 0_level_1,<fct>
DoubPos_1,Double_Pos
DoubPos_2,Double_Pos
DoubPos_3,Double_Pos
mESC_1,Negative
mESC_2,Negative
mESC_3,Negative


In [12]:
dds <- DESeqDataSetFromMatrix(countData=read.counts, colData=coldata, design=~Con_DoubPos)
ddsrun <- DESeq(dds)

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



In [13]:
rld <- rlogTransformation(ddsrun)

In [15]:
##
DoubPos_DE <- results(ddsrun, contrast = c('Con_DoubPos', 'Double_Pos', 'Negative'))
DoubPos_DE <- DoubPos_DE[order(DoubPos_DE$padj), ]

In [16]:
DoubPos_DE <- merge(as.data.frame(DoubPos_DE), as.data.frame(assay(rld)), by="row.names", sort=FALSE)
names(DoubPos_DE)[1] <- "Gene"
(DoubPos_DE)

Gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3
<I<chr>>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSMUSG00000095799,4000.3930,9.511191,0.3135216,30.33663,3.770743e-202,8.348425e-198,12.023726,12.060387,12.337503,6.375306,6.298185,6.611986
ENSMUSG00000093847,2560.1060,9.064085,0.3258068,27.82043,2.455945e-170,2.718732e-166,11.402087,11.478120,11.692799,6.053278,5.944197,6.313507
ENSMUSG00000109969,2491.4194,9.205900,0.3336082,27.59494,1.279592e-167,9.443389e-164,11.479740,11.318389,11.646594,5.961764,5.937967,6.228054
ENSMUSG00000095936,2011.0804,9.043044,0.3378639,26.76534,8.185370e-158,4.530602e-154,11.254647,10.981354,11.314681,5.854863,5.799986,6.020966
ENSMUSG00000082454,713.5413,6.039254,0.2468218,24.46807,3.232491e-132,1.431347e-128,9.879579,9.639541,9.926560,6.091805,5.955909,5.837515
ENSMUSG00000110190,1728.7498,9.576941,0.4107529,23.31558,3.081487e-120,1.137069e-116,10.953534,10.782282,11.143374,5.679284,5.494170,5.767923
ENSMUSG00000095339,1358.5932,9.326826,0.4195542,22.23032,1.748740e-109,5.531016e-106,10.724502,10.444777,10.715064,5.422801,5.414646,5.632342
ENSMUSG00000043073,950.8867,8.532404,0.3904384,21.85339,7.217084e-106,1.997328e-102,10.278317,9.911057,10.224698,5.507496,5.311016,5.282875
ENSMUSG00000107906,854.6702,8.386669,0.3846224,21.80494,2.082818e-105,5.123733e-102,10.008780,9.845242,10.133035,5.309451,5.291123,5.327919
ENSMUSG00000094973,1255.9167,9.307983,0.4295696,21.66816,4.097741e-104,9.072398e-101,10.544778,10.335564,10.672706,5.452013,5.330445,5.531365


In [14]:
#use biomaRt to convert the Ensembl IDs that featurecounts used to Gene names. The alignment was done using mm10, so we will use db version 102, which is the mm10 annotation.
ensembl <- useEnsembl(biomart = 'genes', dataset = 'mmusculus_gene_ensembl', version = 109)
ensembl
#
annotLookup <- getBM(mart = ensembl, attributes = c('ensembl_gene_id','gene_biotype','external_gene_name'), uniqueRows = TRUE)
df <- c(1,3)
annotLookup_gene <- annotLookup[,df]
annotLookup_gene

Object of class 'Mart':
  Using the ENSEMBL_MART_ENSEMBL BioMart database
  Using the mmusculus_gene_ensembl dataset

ensembl_gene_id,external_gene_name
<chr>,<chr>
ENSMUSG00000064336,mt-Tf
ENSMUSG00000064337,mt-Rnr1
ENSMUSG00000064338,mt-Tv
ENSMUSG00000064339,mt-Rnr2
ENSMUSG00000064340,mt-Tl1
ENSMUSG00000064341,mt-Nd1
ENSMUSG00000064342,mt-Ti
ENSMUSG00000064343,mt-Tq
ENSMUSG00000064344,mt-Tm
ENSMUSG00000064345,mt-Nd2


In [18]:
DE_Named <- inner_join(DoubPos_DE, annotLookup_gene, by = c("Gene" = "ensembl_gene_id"))
head(DE_Named)

Unnamed: 0_level_0,Gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3,external_gene_name
Unnamed: 0_level_1,<I<chr>>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,ENSMUSG00000095799,4000.393,9.511191,0.3135216,30.33663,3.770743e-202,8.348425000000001e-198,12.023726,12.060387,12.3375,6.375306,6.298185,6.611986,Eif1ad10
2,ENSMUSG00000093847,2560.106,9.064085,0.3258068,27.82043,2.455945e-170,2.718732e-166,11.402087,11.47812,11.6928,6.053278,5.944197,6.313507,Eif1ad15
3,ENSMUSG00000109969,2491.4194,9.2059,0.3336082,27.59494,1.279592e-167,9.443388999999999e-164,11.47974,11.318389,11.64659,5.961764,5.937967,6.228054,Zscan4-ps3
4,ENSMUSG00000095936,2011.0804,9.043044,0.3378639,26.76534,8.18537e-158,4.530602e-154,11.254647,10.981354,11.31468,5.854863,5.799986,6.020966,Zscan4e
5,ENSMUSG00000082454,713.5413,6.039254,0.2468218,24.46807,3.232491e-132,1.431347e-128,9.879579,9.639541,9.92656,6.091805,5.955909,5.837515,Gm12183
6,ENSMUSG00000110190,1728.7498,9.576941,0.4107529,23.31558,3.0814870000000003e-120,1.137069e-116,10.953534,10.782282,11.14337,5.679284,5.49417,5.767923,Zscan4-ps2


In [19]:
DE_Named <- DE_Named[,c(1,14,2:13)]
DE_Named

Gene,external_gene_name,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3
<I<chr>>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSMUSG00000095799,Eif1ad10,4000.3930,9.511191,0.3135216,30.33663,3.770743e-202,8.348425e-198,12.023726,12.060387,12.337503,6.375306,6.298185,6.611986
ENSMUSG00000093847,Eif1ad15,2560.1060,9.064085,0.3258068,27.82043,2.455945e-170,2.718732e-166,11.402087,11.478120,11.692799,6.053278,5.944197,6.313507
ENSMUSG00000109969,Zscan4-ps3,2491.4194,9.205900,0.3336082,27.59494,1.279592e-167,9.443389e-164,11.479740,11.318389,11.646594,5.961764,5.937967,6.228054
ENSMUSG00000095936,Zscan4e,2011.0804,9.043044,0.3378639,26.76534,8.185370e-158,4.530602e-154,11.254647,10.981354,11.314681,5.854863,5.799986,6.020966
ENSMUSG00000082454,Gm12183,713.5413,6.039254,0.2468218,24.46807,3.232491e-132,1.431347e-128,9.879579,9.639541,9.926560,6.091805,5.955909,5.837515
ENSMUSG00000110190,Zscan4-ps2,1728.7498,9.576941,0.4107529,23.31558,3.081487e-120,1.137069e-116,10.953534,10.782282,11.143374,5.679284,5.494170,5.767923
ENSMUSG00000095339,Zscan4b,1358.5932,9.326826,0.4195542,22.23032,1.748740e-109,5.531016e-106,10.724502,10.444777,10.715064,5.422801,5.414646,5.632342
ENSMUSG00000043073,Usp17le,950.8867,8.532404,0.3904384,21.85339,7.217084e-106,1.997328e-102,10.278317,9.911057,10.224698,5.507496,5.311016,5.282875
ENSMUSG00000107906,Eif4a3l2,854.6702,8.386669,0.3846224,21.80494,2.082818e-105,5.123733e-102,10.008780,9.845242,10.133035,5.309451,5.291123,5.327919
ENSMUSG00000094973,Eif4a3l1,1255.9167,9.307983,0.4295696,21.66816,4.097741e-104,9.072398e-101,10.544778,10.335564,10.672706,5.452013,5.330445,5.531365


In [30]:
filter_list <- read.delim(file = '/u/home/j/jadiruss/20220702_ForJon/DEG_MERVLandZscan_positive_withlog2fc.txt', sep = "\t", header = F)
filter_list

V1,V2
<chr>,<dbl>
ENSMUSG00000086151,8.011065
ENSMUSG00000039395,3.379739
ENSMUSG00000039323,2.052402
ENSMUSG00000090637,8.277226
ENSMUSG00000052477,6.129244
ENSMUSG00000094127,5.686756
ENSMUSG00000026246,6.773163
ENSMUSG00000026407,7.080194
ENSMUSG00000018196,2.850969
ENSMUSG00000066842,4.432043


In [37]:
compare <- right_join(DE_Named, filter_list, by = c("Gene" = "V1")) %>% na.omit
head(compare)

Unnamed: 0_level_0,Gene,external_gene_name,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,DoubPos_1,DoubPos_2,DoubPos_3,mESC_1,mESC_2,mESC_3,V2
Unnamed: 0_level_1,<I<chr>>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,ENSMUSG00000093847,Eif1ad15,2560.106,9.064085,0.3258068,27.82043,2.455945e-170,2.718732e-166,11.40209,11.47812,11.6928,6.053278,5.944197,6.313507,3.405451
2,ENSMUSG00000095936,Zscan4e,2011.0804,9.043044,0.3378639,26.76534,8.18537e-158,4.530602e-154,11.25465,10.981354,11.31468,5.854863,5.799986,6.020966,9.100313
3,ENSMUSG00000095339,Zscan4b,1358.5932,9.326826,0.4195542,22.23032,1.74874e-109,5.531016000000001e-106,10.7245,10.444777,10.71506,5.422801,5.414646,5.632342,9.540102
4,ENSMUSG00000043073,Usp17le,950.8867,8.532404,0.3904384,21.85339,7.217084000000001e-106,1.997328e-102,10.27832,9.911057,10.2247,5.507496,5.311016,5.282875,8.449468
5,ENSMUSG00000094973,Eif4a3l1,1255.9167,9.307983,0.4295696,21.66816,4.097741e-104,9.072398e-101,10.54478,10.335564,10.67271,5.452013,5.330445,5.531365,8.688339
6,ENSMUSG00000044345,Marveld1,961.151,3.795408,0.175803,21.58898,2.279997e-103,4.589011999999999e-100,10.38144,10.291881,10.3891,7.592909,7.841042,7.697172,3.650583


In [None]:
compare <- compare[,c(1,2,4)]
cols <- c("Gene", "External_Gene", "L2FC")
colnames(compare) <- cols
print(compare)

In [36]:
write.table(compare, file = '2c_l2fc.tsv', sep = '\t', col.names = T, row.names = F, quote = F)