# Perform DESeq2 again to make sure results are consistent with the past run (they really should be...)

In [2]:
library("DESeq2")
library("RColorBrewer")
library("ggplot2")
library("pheatmap")
library('dplyr') # we need to unload this package eventually as it masks some essential downstream functions

library('pathview')
library('gage')
library('org.Gg.eg.db')
library("AnnotationDbi")

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, as.vector, cbind, colnames, do.call, duplicated,
    eval, evalq, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
    pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
    tapply, union, unique, unlist, unsplit

Loading required package: IRanges
Loading required package: GenomicRanges


In [5]:
setwd("/home/bay001/projects/kes_20160307/permanent_data/11-3-2016/")
out_dir <- "/home/bay001/projects/kes_20160307/permanent_data/11-3-2016/"

# Load some KEGG Pathway modules

kegg.sets.gal = kegg.gsets("chicken")
kegg.sets.gs = kegg.sets.gal$kg.sets[kegg.sets.gal$sigmet.idx]
head(kegg.sets.gs,3)

In [6]:
tab <- read.table("counts.RSEM.txt",header=TRUE,sep="\t",row.names=1)
samples <- tab
samples <- as.matrix(samples)
colnames(samples)

### Check integrity of matrix (they're in the correct order, etc.) 

In [7]:
samples[1:3,]
dim(samples)

Unnamed: 0,EC.1AK228_CAGATC,EC.2AK546_ACTTGA,EC.3AK436_GATCAG,EC.4AK111_TAGCTT,EC.5AK453_GGCTAC,EC.6AK100_CTTGTA,EC.7AK501_AGTCAA,EC.8AK511_AGTTCC,EC.9AK123_ATGTCA,EC.10AK244_CCGTCC,EC.11AK330_GTCCGC,EC.12AK422_GTGAAA,EC.13AK430_GTGGCC,EC.14AK418_GTTTCG,EC.15AK219_CGTACG,EC.16AK327_GAGTGG,EC.17AK506_ACTGAT,EC.18AK105_ATTCCT
EC-4AK111_TAGCTT_R1_(paired)_contig_1003-0,858,916,863,2406,978,1103,805,919,801,998,960,788,774,1143,1523,1334,1335,971
EC-4AK111_TAGCTT_R1_(paired)_contig_10031-0,404,390,596,754,585,348,422,460,602,633,615,388,531,447,563,549,537,434
EC-4AK111_TAGCTT_R1_(paired)_contig_10050-0,218,174,211,664,283,315,169,148,254,243,180,190,371,253,210,287,354,300


### Basic filtering out any samples with a mean count of less than 1 

In [8]:
rs <- rowMeans(samples)
use <- (rs > 18)
samples <- samples[ use, ]

samples <- as.matrix(samples)
dim(samples)

### The original list has sample 7 (AK501) as male, but it is actually female.  

In [9]:

factors <- as.factor(c(rep("SCCP",6),rep("TBBPA",6),rep("CONTROL",6)))
type <- c('m','f','f','m','m','f','f','f','m','f','m','f','f','m','f','m','m','f')
# type <- c('f1','m2','f3','f4','m5','f6','m7','m8','f9','m10','f11','f12','m13','m14','f15','m16','f17','m18')
(coldata <- data.frame(row.names=colnames(samples), factors, type)
)

dds <- DESeqDataSetFromMatrix(countData=samples, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

Unnamed: 0,factors,type
EC.1AK228_CAGATC,SCCP,m
EC.2AK546_ACTTGA,SCCP,f
EC.3AK436_GATCAG,SCCP,f
EC.4AK111_TAGCTT,SCCP,m
EC.5AK453_GGCTAC,SCCP,m
EC.6AK100_CTTGTA,SCCP,f
EC.7AK501_AGTCAA,TBBPA,f
EC.8AK511_AGTTCC,TBBPA,f
EC.9AK123_ATGTCA,TBBPA,m
EC.10AK244_CCGTCC,TBBPA,f


In [10]:
dds_rlog <- rlogTransformation(dds)

# Comparing all samples (PCA and Heatmap)

In [11]:
png(paste0(out_dir,"all_samples_pca.png"))
data <- plotPCA(dds_rlog, intgroup=c("factors", "type"), returnData=TRUE)
percentVar <- round(100 * attr(data, "percentVar"))
ggplot(data, aes(PC1, PC2, color=factors, shape=type),
       main = "PCA Plot") +
  geom_point(size=3) +
  scale_shape_manual(values=seq(0,8)) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance"))
dev.off()

sampleDists <- dist(t(assay(dds_rlog)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(dds_rlog$factors, dds_rlog$type, sep="-")
#colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
png(paste0(out_dir,"all_samples_heatmap.png"))
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors,
         main = "Heatmap")
dev.off()

In [12]:
dds_rlog

class: DESeqTransform 
dim: 26625 18 
metadata(0):
assays(1): ''
rownames(26625): EC-4AK111_TAGCTT_R1_(paired)_contig_1003-0
  EC-4AK111_TAGCTT_R1_(paired)_contig_10031-0 ...
  unmapped-49-contig_list_contig_99982-0
  unmapped-49-contig_list_contig_9999-0
rowRanges metadata column names(6): baseMean baseVar ... dispFit
  rlogIntercept
colnames(18): EC.1AK228_CAGATC EC.2AK546_ACTTGA ... EC.17AK506_ACTGAT
  EC.18AK105_ATTCCT
colData names(3): factors type sizeFactor

# Comparing SCCP vs Control (Male)

In [16]:
prefix <- "SCCP_vs_CONTROL_MALE"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(1,4,5,14,16,17)]
factors <- as.factor(c(rep("SCCP",3),rep("CONTROL",3)))
type <- c(rep('m',6))

(coldata <- data.frame(row.names=colnames(samples_2), factors, type)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))
DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot",
      ylim = c(0,15)
    )
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()
paste0(file.path(getwd(), prefix))

Unnamed: 0,factors,type
EC.1AK228_CAGATC,SCCP,m
EC.4AK111_TAGCTT,SCCP,m
EC.5AK453_GGCTAC,SCCP,m
EC.14AK418_GTTTCG,CONTROL,m
EC.16AK327_GAGTGG,CONTROL,m
EC.17AK506_ACTGAT,CONTROL,m


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [16]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db,
                     keys=row.names(res), 
                     column="SYMBOL",
                     keytype="ENSEMBL",
                     multiVals="first")
res$entrez = mapIds(org.Gg.eg.db,
                     keys=row.names(res), 
                     column="ENTREZID",
                     keytype="ENSEMBL",
                     multiVals="first")
res$name =   mapIds(org.Gg.eg.db,
                     keys=row.names(res), 
                     column="GENENAME",
                     keytype="ENSEMBL",
                     multiVals="first")

head(res, 3)

log2 fold change (MAP): factors SCCP vs CONTROL 
Wald test p-value: factors SCCP vs CONTROL 
DataFrame with 3 rows and 9 columns
                                             baseMean log2FoldChange     lfcSE
                                            <numeric>      <numeric> <numeric>
EC-4AK111_TAGCTT_R1_(paired)_contig_1003-0  1274.8218     0.08076285 0.2138344
EC-4AK111_TAGCTT_R1_(paired)_contig_10031-0  532.9849     0.18740006 0.1708637
EC-4AK111_TAGCTT_R1_(paired)_contig_10050-0  324.7375     0.25734656 0.2402052
                                                 stat    pvalue      padj
                                            <numeric> <numeric> <numeric>
EC-4AK111_TAGCTT_R1_(paired)_contig_1003-0  0.3776887 0.7056619 0.9065259
EC-4AK111_TAGCTT_R1_(paired)_contig_10031-0 1.0967812 0.2727370 0.6483455
EC-4AK111_TAGCTT_R1_(paired)_contig_10050-0 1.0713613 0.2840070 0.6569354
                                                 symbol      entrez        name
                          

In [17]:
foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)
lapply(keggres, head)

Unnamed: 0,p.geomean,stat.mean,p.val,q.val,set.size,exp1
gga03010 Ribosome,3.494614e-10,6.4891959,3.494614e-10,4.682783e-08,101,3.494614e-10
gga00190 Oxidative phosphorylation,0.01118861,2.3049833,0.01118861,0.7496368,86,0.01118861
gga00982 Drug metabolism - cytochrome P450,0.0367358,1.8387114,0.0367358,1.0,21,0.0367358
gga04260 Cardiac muscle contraction,0.1342654,1.115097,0.1342654,1.0,38,0.1342654
gga00980 Metabolism of xenobiotics by cytochrome P450,0.1377748,1.1036139,0.1377748,1.0,24,0.1377748
gga03060 Protein export,0.1882168,0.8945251,0.1882168,1.0,21,0.1882168

Unnamed: 0,p.geomean,stat.mean,p.val,q.val,set.size,exp1
gga04510 Focal adhesion,3.188623e-12,-7.15305,3.188623e-12,4.272755e-10,153,3.188623e-12
gga04810 Regulation of actin cytoskeleton,4.944981e-08,-5.477506,4.944981e-08,3.313137e-06,136,4.944981e-08
gga04512 ECM-receptor interaction,1.597564e-07,-5.450778,1.597564e-07,6.315038e-06,55,1.597564e-07
gga04144 Endocytosis,1.885086e-07,-5.168802,1.885086e-07,6.315038e-06,203,1.885086e-07
gga04010 MAPK signaling pathway,2.883595e-05,-4.076891,2.883595e-05,0.0007728033,161,2.883595e-05
gga04530 Tight junction,5.283704e-05,-3.970597,5.283704e-05,0.001180027,87,5.283704e-05

Unnamed: 0,stat.mean,exp1
gga03010 Ribosome,6.4891959,6.4891959
gga00190 Oxidative phosphorylation,2.3049833,2.3049833
gga00982 Drug metabolism - cytochrome P450,1.8387114,1.8387114
gga04260 Cardiac muscle contraction,1.115097,1.115097
gga00980 Metabolism of xenobiotics by cytochrome P450,1.1036139,1.1036139
gga03060 Protein export,0.8945251,0.8945251


In [18]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)


In [19]:
# plot_pathway = function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", new.signature=FALSE)
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga",
                                                kegg.dir=file.path(getwd(), prefix)))

Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga00982, 1/1 pathways..
Info: Downloading png files for gga00982, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00982.pathview.png
Info: Downloading xml files for gga04260, 1/1 pathways..
Info: Downloading png files for gga04260, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04260.pathview.png
Info: Downloading xml files for gga00980, 1/1 pathways..
Info: Downloading png files for gga00980, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working 

In [22]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



In [23]:
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))

Info: Downloading xml files for gga04510, 1/1 pathways..
Info: Downloading png files for gga04510, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04510.pathview.png
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga04512, 1/1 pathways..
Info: Downloading png files for gga04512, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 41 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04512.pathview.png
Info: Downloading xml files for gga04144, 1/1 pathways..
Info: Downloading png files for gga04144, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04144.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga04010, 1/1 pathways..
Info: Downloading png files for gga04010, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 111 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04010.pathview.png


# Comparing SCCP vs Control (Female)

In [25]:
prefix <- "SCCP_vs_CONTROL_FEMALE"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(2,3,6,13,15,18)]
factors <- as.factor(c(rep("SCCP",3),rep("CONTROL",3)))
type <- c(rep('f',6))

(coldata <- data.frame(row.names=colnames(samples_2), factors, type)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)

resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))

DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot")
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

Unnamed: 0,factors,type
EC.2AK546_ACTTGA,SCCP,f
EC.3AK436_GATCAG,SCCP,f
EC.6AK100_CTTGTA,SCCP,f
EC.13AK430_GTGGCC,CONTROL,f
EC.15AK219_CGTACG,CONTROL,f
EC.18AK105_ATTCCT,CONTROL,f


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [26]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first")
res$entrez = mapIds(org.Gg.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
res$name =   mapIds(org.Gg.eg.db, keys=row.names(res), column="GENENAME", keytype="ENSEMBL", multiVals="first")

foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)

library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))    


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga03420, 1/1 pathways..
Info: Downloading png files for gga03420, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03420.pathview.png
Info: Downloading xml files for gga04260, 1/1 pathways..
Info: Downloading png files for gga04260, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04260.pathview.png
Info: Downloading xml files for gga00980, 1/1 pathways..
Info: Downloading png files for gga00980, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working 

In [27]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga04510, 1/1 pathways..
Info: Downloading png files for gga04510, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04510.pathview.png
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga04010, 1/1 pathways..
Info: Downloading png files for gga04010, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 111 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04010.pathview.png
Info: Downloading xml files for gga04530, 1/1 pathways..
Info: Downloading png files for gga04530, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04530.pathview.png
Info: Downloading xml files for gga04512, 1/1 pathways..
Info: Downloading png files for gga04512, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 41 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04512.pathview.png


# Comparing TBBPA and Control (Male)

In [28]:
prefix <- "TBBPA_vs_CONTROL_MALE"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(9,11,14,16,17)]
factors <- as.factor(c(rep("TBBPA",2),rep("CONTROL",3)))
type <- c(rep('m',5))

(coldata <- data.frame(row.names=colnames(samples_2), factors, type)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)

resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))
DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot")
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

Unnamed: 0,factors,type
EC.9AK123_ATGTCA,TBBPA,m
EC.11AK330_GTCCGC,TBBPA,m
EC.14AK418_GTTTCG,CONTROL,m
EC.16AK327_GAGTGG,CONTROL,m
EC.17AK506_ACTGAT,CONTROL,m


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [29]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first")
res$entrez = mapIds(org.Gg.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
res$name =   mapIds(org.Gg.eg.db, keys=row.names(res), column="GENENAME", keytype="ENSEMBL", multiVals="first")

foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)

library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))    



Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga00982, 1/1 pathways..
Info: Downloading png files for gga00982, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00982.pathview.png
Info: Downloading xml files for gga00980, 1/1 pathways..
Info: Downloading png files for gga00980, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00980.pathview.png
Info: Downloading xml files for gga03060, 1/1 pathways..
Info: Downloading png files for gga03060, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working 

In [30]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga04110, 1/1 pathways..
Info: Downloading png files for gga04110, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 73 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04110.pathview.png
Info: Downloading xml files for gga04144, 1/1 pathways..
Info: Downloading png files for gga04144, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04144.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga03015, 1/1 pathways..
Info: Downloading png files for gga03015, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 47 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03015.pathview.png
Info: Downloading xml files for gga03018, 1/1 pathways..
Info: Downloading png files for gga03018, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 54 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03018.pathview.png


# Comparing TBBPA and Control (Female)

In [31]:
prefix <- "TBBPA_vs_CONTROL_FEMALE"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(7,8,10,12,13,15,18)]
factors <- as.factor(c(rep("TBBPA",4),rep("CONTROL",3)))
type <- c(rep('f',7))

(coldata <- data.frame(row.names=colnames(samples_2), factors, type)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)

resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))
DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot")
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

Unnamed: 0,factors,type
EC.7AK501_AGTCAA,TBBPA,f
EC.8AK511_AGTTCC,TBBPA,f
EC.10AK244_CCGTCC,TBBPA,f
EC.12AK422_GTGAAA,TBBPA,f
EC.13AK430_GTGGCC,CONTROL,f
EC.15AK219_CGTACG,CONTROL,f
EC.18AK105_ATTCCT,CONTROL,f


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [32]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first")
res$entrez = mapIds(org.Gg.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
res$name =   mapIds(org.Gg.eg.db, keys=row.names(res), column="GENENAME", keytype="ENSEMBL", multiVals="first")

foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)

library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))    



Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga00563, 1/1 pathways..
Info: Downloading png files for gga00563, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00563.pathview.png
Info: Downloading xml files for gga00140, 1/1 pathways..
Info: Downloading png files for gga00140, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00140.pathview.png
Info: Downloading xml files for gga04146, 1/1 pathways..
Info: Downloading png files for gga04146, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 62 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04146.pathview.png


In [33]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga04510, 1/1 pathways..
Info: Downloading png files for gga04510, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04510.pathview.png
Info: Downloading xml files for gga04010, 1/1 pathways..
Info: Downloading png files for gga04010, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 111 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04010.pathview.png
Info: Downloading xml files for gga04150, 1/1 pathways..
Info: Downloading png files for gga04150, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 63 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04150.pathview.png
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga04310, 1/1 pathways..
Info: Downloading png files for gga04310, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 4 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04310.pathview.png


# SCCP vs CONTROL (ALL)

In [34]:
prefix <- "SCCP_vs_CONTROL"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(1:6,13:18)]
factors <- as.factor(c(rep("SCCP",6),rep("CONTROL",6)))

(coldata <- data.frame(row.names=colnames(samples_2), factors)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))
DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot")
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

Unnamed: 0,factors
EC.1AK228_CAGATC,SCCP
EC.2AK546_ACTTGA,SCCP
EC.3AK436_GATCAG,SCCP
EC.4AK111_TAGCTT,SCCP
EC.5AK453_GGCTAC,SCCP
EC.6AK100_CTTGTA,SCCP
EC.13AK430_GTGGCC,CONTROL
EC.14AK418_GTTTCG,CONTROL
EC.15AK219_CGTACG,CONTROL
EC.16AK327_GAGTGG,CONTROL


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [35]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first")
res$entrez = mapIds(org.Gg.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
res$name =   mapIds(org.Gg.eg.db, keys=row.names(res), column="GENENAME", keytype="ENSEMBL", multiVals="first")

foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)

library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))    



Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga04260, 1/1 pathways..
Info: Downloading png files for gga04260, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04260.pathview.png
Info: Downloading xml files for gga00982, 1/1 pathways..
Info: Downloading png files for gga00982, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00982.pathview.png
Info: Downloading xml files for gga00980, 1/1 pathways..
Info: Downloading png files for gga00980, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working 

In [36]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga04510, 1/1 pathways..
Info: Downloading png files for gga04510, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04510.pathview.png
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga04512, 1/1 pathways..
Info: Downloading png files for gga04512, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 41 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04512.pathview.png
Info: Downloading xml files for gga04010, 1/1 pathways..
Info: Downloading png files for gga04010, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 111 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04010.pathview.png
Info: Downloading xml files for gga04144, 1/1 pathways..
Info: Downloading png files for gga04144, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04144.pathview.png
Info: some node width is different from others, and hence adjusted!


# TBBPA vs Control (ALL)

In [37]:
prefix <- "TBBPA_vs_CONTROL"

dir.create(file.path(getwd(), prefix), showWarnings = FALSE)

samples_2 <- samples[,c(7:12,13:18)]
factors <- as.factor(c(rep("TBBPA",6),rep("CONTROL",6)))

(coldata <- data.frame(row.names=colnames(samples_2), factors)
)
dds <- DESeqDataSetFromMatrix(countData=samples_2, colData=coldata, design=~factors)
dds$factors <- relevel(dds$factors, ref="CONTROL")

dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj,res$log2FoldChange),] # orders by foldchange and padj
write.csv(as.data.frame(resOrdered),file=paste0(file.path(getwd(), prefix),'/diffexp.csv'))

png(paste0(file.path(getwd(), prefix),'/maplot.png'))
DESeq2::plotMA(res, main="MA Plot", ylim=c(-7,6) )
legend("topright",c("pval > 0.01","pval < 0.01"),col=c("grey","red"),pch=16)
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

png(paste0(file.path(getwd(), prefix),'/volcano.png'))
plot( res$log2FoldChange, -log10( res$padj ), 
      col = ifelse( res$padj < .05, "red", "black" ),
      main = "Volcano Plot")
legend("topright",c("pval > 0.05","pval < 0.05"),col=c("grey","red"),pch=16)
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
dev.off()

Unnamed: 0,factors
EC.7AK501_AGTCAA,TBBPA
EC.8AK511_AGTTCC,TBBPA
EC.9AK123_ATGTCA,TBBPA
EC.10AK244_CCGTCC,TBBPA
EC.11AK330_GTCCGC,TBBPA
EC.12AK422_GTGAAA,TBBPA
EC.13AK430_GTGGCC,CONTROL
EC.14AK418_GTTTCG,CONTROL
EC.15AK219_CGTACG,CONTROL
EC.16AK327_GAGTGG,CONTROL


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [38]:
# Credit: http://www.gettinggeneticsdone.com/2015/12/tutorial-rna-seq-differential.html

res$symbol = mapIds(org.Gg.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first")
res$entrez = mapIds(org.Gg.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
res$name =   mapIds(org.Gg.eg.db, keys=row.names(res), column="GENENAME", keytype="ENSEMBL", multiVals="first")

foldchanges = res$log2FoldChange
names(foldchanges) = res$entrez
keggres = gage(foldchanges, gsets=kegg.sets.gs, same.dir=TRUE)

library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))    



Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga03010, 1/1 pathways..
Info: Downloading png files for gga03010, 1/1 pathways..
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga03010.pathview.png
Info: Downloading xml files for gga00190, 1/1 pathways..
Info: Downloading png files for gga00190, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 100 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00190.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga00982, 1/1 pathways..
Info: Downloading png files for gga00982, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00982.pathview.png
Info: Downloading xml files for gga00980, 1/1 pathways..
Info: Downloading png files for gga00980, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga00980.pathview.png
Info: Downloading xml files for gga00140, 1/1 pathways..
Info: Downloading png files for gga00140, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working 

In [39]:
library('dplyr')
(keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% 
  tbl_df() %>% 
  filter(row_number()<=5) %>% 
  .$id %>% 
  as.character())
(keggresids = substr(keggrespathways, start=1, stop=8)
)
detach("package:dplyr", unload=TRUE)

tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="gga", 
                                                kegg.dir=file.path(getwd(), prefix)))


Attaching package: 'dplyr'

The following object is masked from 'package:AnnotationDbi':

    select

The following object is masked from 'package:Biobase':

    combine

The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union

The following object is masked from 'package:GenomeInfoDb':

    intersect

The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from 'package:S4Vectors':

    intersect, rename, setdiff, union

The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Info: Downloading xml files for gga04510, 1/1 pathways..
Info: Downloading png files for gga04510, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04510.pathview.png
Info: Downloading xml files for gga04810, 1/1 pathways..
Info: Downloading png files for gga04810, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 1 of 64 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04810.pathview.png
Info: Downloading xml files for gga04144, 1/1 pathways..
Info: Downloading png files for gga04144, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04144.pathview.png
Info: some node width is different from others, and hence adjusted!
Info: Downloading xml files for gga04110, 1/1 pathways..
Info: Downloading png files for gga04110, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 2 of 73 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04110.pathview.png
Info: Downloading xml files for gga04010, 1/1 pathways..
Info: Downloading png files for gga04010, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns


[1] "Note: 3 of 111 unique input IDs unmapped."


Info: Working in directory /home/bay001/projects/kes_20160307/permanent_data/10-11-2016
Info: Writing image file gga04010.pathview.png
