### Plots of coverage a TSS of DEGs
Use Homer to make tables of read counts in a 2kb window centered at the TSS of the most DEGs between iPSCs and iPSC-CMs. Use scripts in the scripts folder.
1. v1 = dataset used
2. v2 = comparison between purified vs non putified CMs
3. v3 = combined with new ATAC data (15-01-19): note: the combined tag directory had a problem with estimated fragment size which gave too large peaks/fragments sized so I run the command: makeTagDirectory Combined_ATAC_iPSC_CM -update -fragLength 90 (forcing 90 bp) to correct this

In [79]:
setwd("/home/paola/Family1070/private_output")
setwd("TSS_plots")

In [80]:
source("~/Family1070/scripts/Plotting_functions.r")    

In [153]:
deg_list =    "/home/paola/Family1070/private_output/PCA_rnaseq/DEG/Top_DEGs.txt"
dir_list1 = c('Meta_sample_ATAC_iPSC', 'Meta_sample_H3K27ac_iPSC', 'Meta_sample_ATAC_iPSC_CM_LAC' ,
              'Meta_sample_H3K27ac_iPSC_CM' ,'Meta_sample_NKX25_iPSC_CM')
dir_list2 = c('Meta_sample_ATAC_iPSC_CM_DAY15', 'Meta_sample_ATAC_iPSC_CM_LAC',
              'Meta_sample_H3K27ac_iPSC_CM_DAY15' ,'Meta_sample_H3K27ac_iPSC_CM_LAC' ,
              'ChIP-Seq_NKX2-5/CM.2_3_R1_FS003', 'ChIP-Seq_NKX2-5/CM.2_3_R4_FS024')
dir_list3 = c('Combined_ATAC_iPSC', 'Meta_sample_H3K27ac_iPSC', 'Combined_ATAC_iPSC_CM' ,
              'Meta_sample_H3K27ac_iPSC_CM' ,'Meta_sample_NKX25_iPSC_CM')

command = paste("annotatePeaks.pl tss hg19 -list", deg_list, "-size 2000 -hist 25 -ghist -d",
                 paste0(dir_list3, collapse=" "), "> TSS_counts_3.txt", sep=" " )
# command = paste("annotatePeaks.pl tss hg19 -list", deg_list, "-size 4000 -hist 50 -ghist -d",
#                  paste0(dir_list3, collapse=" "), "> TSS_counts_6.txt", sep=" " )

In [154]:
output_file = "Tss_count.sh"
sh_file =paste( writeHeader(id = "TSS_count", folder = getwd(), ppn=8, que = "short"),
                        "module load cardips", 
                        "cd /frazer01/projects/CARDIPS/analysis/family1070_homer",
                         command,
                         sep = "\n\n")
write (sh_file, file = output_file)
run = paste ("qsub", output_file)
system(run)

In [15]:
library(pheatmap)
library(RColorBrewer)

In [16]:
fc=read.table(deg_list, header=T)
refseq=read.table('RefSeq_ids.txt',header=F)
fc=merge(fc, refseq[,c(13,2)], by=1)
colnames(fc)[length(fc)]<-"refseq_ID"

In [17]:
file = "TSS_counts_1.txt"
n=5

In [18]:
file = "TSS_counts_2.txt"
n=6

In [157]:
file = "../Homer/TSS_counts_3.txt"
n=5

In [158]:
tags=read.table(file,  header=T, row.names=1, check.names=F)

In [159]:
tags2=merge(fc, tags, by.y="row.names", by.x="refseq_ID")
tags2=tags2[order(tags2$log2FoldChange, decreasing=T),]
tags2<-subset(tags2, !duplicated(tags2$gene_name))
rownames(tags2)=tags2$gene_name

fc2<-subset(tags2, select=c("gene_name", "log2FoldChange"))
tags2=tags2[, 4:length(tags2)]

In [160]:
fc2[,2]=fc2[,2]>0
fc2[,2]=as.character(fc2[,2])

FC=data.frame(fc2[,2])
colnames(FC)="FC_iPSC"
rownames(FC)=fc2[,1]

FC_iPSC     <- c("magenta4","limegreen")
names(FC_iPSC) <- c("FALSE", "TRUE")
anno_colors <- list(FC_iPSC = FC_iPSC)


In [161]:
#pdf("TagHeatmap_v3.png", width=8, height=10)
png("TagHeatmap_v3.png", width = 5, height = 6, units = 'in', res = 300 )
color1=colorRampPalette(c("white", "red2"))(50)
color2=colorRampPalette(c("red2", "red4"))(10)

pheatmap(tags2 ,
         color = c(color1, color2), breaks=unique(c(seq(0, 20, length.out=50), seq(20, max(tags2),length.out=10))),
         cellwidth = 0.5, cellheight = 0.075, fontsize=9, border_color=NA , show_rownames = F,
         show_colnames = F, cluster_rows = F, cluster_cols = F,
         annotation_row=FC, annotation_colors=anno_colors, annotation_legend=T)
dev.off()

In [162]:

fc2=fc2[order(fc2[,2], decreasing=T),]
p=sum(fc2[,2]=="TRUE")

meansiPS=NA
meansCM=NA

for (i in 1:n){
  df=tags2[, (81*(i-1)+1):(i*81)]
  
  means1=colMeans(df[1:p,])
  means2=colMeans(df[(p+1):nrow(tags2),])
  means= c(means1, means2)
  
  meansi=(means1-min(means))/(max(means)-min(means))
  meansiPS[(81*(i-1)+1):(i*81)]=meansi
  
  meansc=(means2-min(means))/(max(means)-min(means))
  meansCM[(81*(i-1)+1):(i*81)]=meansc
  
}



In [163]:
pdf("Histogram_averages_v3.pdf")
par(pin=c(n,0.8))
plot(meansiPS, col="limegreen", type="l", lwd=2, las=2, axes=F)
lines(meansCM,col="magenta4", lwd=2 )
abline(v=(1:7)*81, lwd=1, lty=2)
abline(v=0, lwd=1, lty=2)
abline(h=c(-0.01,1.01), lwd=1, lty=2)
dev.off()


In [78]:
pdf("Histogram_averages_v2_lac_vs_nolac.pdf")
par(mfrow = c(3,1), pin=c(2,0.8), mar=c(2,10,2,10))

for (i in 1:3){
plot(meansCM[(1:162)+162*(i-1)],col="magenta4", type="l", lwd=2, las=2, axes=F, xlab=NA, ylim=c(0,1) )
lines(meansiPS[(1:162)+162*(i-1)], col="limegreen",  lwd=2)

abline(v=(1:7)*81, lwd=1, lty=2)
abline(v=0, lwd=1, lty=2)
abline(h=c(-0.01,1.01), lwd=1, lty=2)
}
dev.off()