## Correlation of NKX2-5 ASE effect size with heart-specific eQTLs
We have observed that there is no correlation between ASE in NKX2-5 and gene expression levels of the nearest gene in iPSC-CMs, however there is an enrichment for heart-specific eQTLS in NKX2-5 ASE. Test the same but restricted to heart or heart-specific eQTLs - gene pairs.

In [1]:
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(gplots))

home     ='/home/paola/Family1070/private_output'
setwd(home)
ase_dir  = "ASE_chip/correlationWithBeta/"
gtex_dir = "Enrichment_annotations/GTEx/"

nm    = c('NKX25','H3K27AC_CM','H3K27AC_IPSC')
loci  = nm
QTL   = paste (nm, "_eQTLs.txt", sep="") 


corr         = paste(ase_dir , "Correlation_ase_snv_", loci, ".txt", sep="")
rna_cms      = 'PCA_rnaseq/iPSC_CM/residual_counts.txt'
chip_tables  = paste("PCA_chipseq", loci, 'residual_counts.txt',sep="/")
sample_table = read.csv("PCA_chipseq/fam1070_data_plus_production-1.csv", stringsAsFactors = FALSE) 


gene_info <- read.table("/publicdata/gencode_v19_20151104/gene_info.tsv", header=T, sep="\t", stringsAsFactor=F)

In [102]:
new.dir = "Coordination_ASE_effects/results/Coordination_at_heart_eQTLs"
dir.create(new.dir)

"'Coordination_ASE_effects/results/Coordination_at_heart_eQTLs' already exists"

In [103]:
scatterPlot_beta = function(ag, beta_x, beta_y,xlab, ylab, colors ="#1B69AF") {
    
ag = ag[order(ag$combined_fdr, decreasing=F),] 


xlim = c(-max(abs(ag[, beta_x])),max(abs(ag[, beta_x]))) 
ylim = c(-max(abs(ag[, beta_y])),max(abs(ag[, beta_y]))) 

plot(ag[, beta_x], ag[, beta_y], ylim=ylim, xlim=xlim,
     pch=16, cex=0.5, col=colors, 
     cex.lab=1.1, cex.axis=1.1, xlab = paste ("effect size", xlab),
     ylab = paste ("effect size", ylab), main=ylab)

abline(h=0,v=0, lty=2, col="red", lwd=1)

  
co = cor.test(ag[, beta_x], ag[, beta_y], method="spearman", exact=F)
l  = lm(ag[, beta_y]~ag[, beta_x] )
abline(l)
text (xlim[2]-xlim[2]/1.8, ylim[1]-ylim[1]/3
      , paste( "r=",round(co$estimate,2), "\n P=",signif(co$p.value,3) ),cex=1.1)

    return(c(co$estimate, co$p.value))
    
}

In [109]:
i = 2

In [110]:
eqtls = read.table( paste(gtex_dir, nm[i], ".GtexTable_hreg_aggregate.txt", sep=""), header=T,  sep="\t") 

heart_eqtls    = subset(eqtls, Hear>0)
heart_specific = subset(eqtls, ((eqtls[,2]/2)-eqtls[,"Hear"])<=0)
heart_eqtls    = merge( heart_eqtls [ ,c('snpID','gene')], gene_info[, c('gene_id','gene_name')], by.x ='gene', by.y='gene_name' )
heart_specific = merge( heart_specific [ ,c('snpID','gene')], gene_info[, c('gene_id','gene_name')], by.x ='gene', by.y='gene_name' )

ch = read.table(corr[i], header=TRUE, check.names=F)
rn        = read.table(rna_cms, header=T, check.names=F)
samp      = sample_table[sample_table$Cell_type == "iPSC-CM"& sample_table$Data_type=="RNA-Seq",]
closest   = "exp_iPSC_CM"
chp  = merge(ch, heart_eqtls, by.x="ID", by.y="snpID" )


chpr = merge(chp [c("gene", "gene_id","peakID" ,"varID" ,"combined_pv", "combined_fdr" ,"ref_freq", "Coefficient",
             "ID" ,"REF" ,"ALT",'iPSCORE_2_1', 'iPSCORE_2_2', 'iPSCORE_2_3' ,'iPSCORE_2_4', 'iPSCORE_2_6', 'iPSCORE_2_7' ,'iPSCORE_2_9')], 
                rn, by="gene_id", by.y="row.names")

peaks      = subset(chpr, select=as.character(samp$UUID))        
genotype   = subset(chpr, select=as.character(samp$Subject_ID))
   

res=data.frame()
for( m in 1:nrow(chpr)) {    
    
    df         = data.frame(peaks=t(peaks)[,m], genotype=t(genotype)[,m])
    df$subject = str_split_fixed(colnames(genotype), "\\.", 2)[,1]
    df$peaks   = scale(df$peak) # z-score normalized
    
    mod<-lm(peaks ~ genotype, data=df)  
    cof<-coef(summary(mod))[2,1]
    pv<-coef(summary(mod))[2,4]
    res[m,"Coefficient_closest"]<-cof
    res[m,"lm_pVal_closest"]<-pv
  
  }  


ag = cbind(chpr, res)
write.table(ag, paste( new.dir, "/Coordination_at_heart_eQTLs",  loci[i], "_with_", closest, ".txt", sep=""), 
            sep="\t", row.names=FALSE, col.names=TRUE)

    



pdf(paste(new.dir, "/Scatterplots_", nm[i], ".pdf", sep=""))

results = data.frame()
results["n","heart_eQTLs"] = nrow(ag)

par(mfrow=c(3,4), pin=c(1.3,1.3))

results[paste(c("r_","p_"), closest,nm[i], sep =""), 
        "heart_eQTLs"] = scatterPlot_beta(ag, 'Coefficient', 'Coefficient_closest',nm[i], closest, colors = "brown")

ag_specific = subset (ag, ID %in% as.character(heart_specific$snpID) )
results["n","heart_spec_eQTLs"] = nrow(ag_specific)

results[paste(c("r_","p_"), closest,nm[i], sep =""), 
        "heart_spec_eQTLs"] = scatterPlot_beta(ag_specific, 'Coefficient', 'Coefficient_closest',paste(nm[i], "heart spec."), closest)

plot.new()
plot.new()
h_tissues= c('Heart_Left_Ventricle_' , 'Heart_Atrial_Appendage_'  )

for (g in 1:2){

gtex_lv = read.table(paste("/publicdata/gtex_v6/", h_tissues[g], "Analysis.snpgenes",sep=""), header=T, sep="\t")

gtex_lv$varID = paste("chr", gtex_lv$snp_chrom, ":", gtex_lv$snp_pos, sep="")

gtex_lv = subset(gtex_lv, varID %in% ag$varID)

ag2 = merge( ag, gtex_lv [ ,c('beta', 'gene' ,'varID')], by.x=c("varID", 'gene_id'), by.y=c("varID", 'gene'))

results[paste(c("r_","p_"), h_tissues[g],nm[i], sep =""), 
        "heart_eQTLs"] = scatterPlot_beta(ag2, 'Coefficient','beta', nm[i],   h_tissues[g], colors = "brown")
results[paste(c("r_","p_"), h_tissues[g],closest, sep =""),
        "heart_eQTLs"] = scatterPlot_beta(ag2,  'Coefficient_closest', 'beta', closest, h_tissues[g], colors = "brown")
   
ag2_specific = merge( ag_specific, gtex_lv [ ,c('beta', 'gene' ,'varID')], by.x=c("varID", 'gene_id'), by.y=c("varID", 'gene'))

results[paste(c("r_","p_"), h_tissues[g],nm[i], sep =""), 
        "heart_spec_eQTLs"] = scatterPlot_beta(ag2_specific, 'Coefficient' ,'beta', paste(nm[i], "heart spec."),h_tissues[g])
results[paste(c("r_","p_"), h_tissues[g],closest, sep =""),
        "heart_spec_eQTLs"] = scatterPlot_beta(ag2_specific,'Coefficient_closest', 'beta', paste(closest, "heart spec."), h_tissues[g])
   
 
}
dev.off()

rho  = results[c(FALSE,TRUE), ]
pval = results[c(TRUE,FALSE), ]
pval = -log( pval [-1,],10)

In [111]:
pdf(paste(new.dir, "/Heatmap_", nm[i], ".pdf", sep=""))
my_palette = colorRampPalette(c( "white","dodgerblue3"))(10)
heatmap.2(as.matrix(pval) , srtRow=0, srtCol=45, offsetRow=-0.5, offsetCol=-0.5, 
          keysize=1, margins =c(28,29), trace="none",Colv=F,Rowv=F,
          key.title="-Log10(P)", cellnote=round(rho,2), notecol="black", 
          cexRow=1, cexCol=1, col=my_palette,
          sepwidth=c(0.01,0.01),sepcolor="black",colsep=0:ncol(pval),rowsep=0:nrow(pval))
dev.off()

"Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column dendogram."

In conclusion we observed that there is a a good correlation between the eQTLS from gtex ang the effects observed in iPSC-CMs at these loci. Possible implication of NKX2-5 binding

### GO analysis on genes that have ASE in NKX2-5 and that have GTEx eQTL in heart (n=116) or heart-specific (n=39)

In [88]:
suppressPackageStartupMessages(library(goseq))

In [112]:
dir.create(paste( new.dir, "GO", sep="/"))
pdf(paste(new.dir, "/GO/", nm[i], "_GO.pdf", sep=""))
    par(mfrow=c(2,2), mar=c(2,15,2,1))    
   
for (t in 1:3){
   
if(t==1){    
    set = "heart_eqtls"
    test        = rownames(rn) %in% ag$gene_id
    names(test) = str_split_fixed(rownames(rn),"\\.",2)[,1]
    test = test[!duplicated(names(test))]
    }
if(t==2){    
    set = "heart_specific_eqtls"
    test        = rownames(rn) %in% ag_specific$gene_id
    names(test) = str_split_fixed(rownames(rn),"\\.",2)[,1]
    test = test[!duplicated(names(test))]
    }
if(t==3){    
    set = "heart_specific_versus_heart_ eqtls"
    test        = ag$gene_id %in% ag_specific$gene_id
    names(test) = str_split_fixed(ag$gene_id,"\\.",2)[,1]
    test        = test[!duplicated(names(test))]
}

    pwf         = nullp(test,"hg19","ensGene", plot=F)
    GO          = goseq(pwf,"hg19","ensGene", test.cats=c("GO:BP"))
    GO$Bonferroni  = -log(p.adjust(GO$over_represented_pvalue, method="bonferroni"),10)
    GO             = GO[order(GO$over_represented_pvalue, decreasing=F),]
      
barplot(GO$Bonferroni[20:1], names.arg=GO$term[20:1], horiz=T, main=set,
    las=1, cex.axis=0.8, cex.names=0.8, density=c(-1, 20)[(GO$Bonferroni[20:1]<=1.3)+1])
    abline(v=1.3, lwd=2, lty=2, col="red")      
          
}
dev.off()

"'Coordination_ASE_effects/results/Coordination_at_heart_eQTLs/GO' already exists"Loading hg19 length data...
Fetching GO annotations...
For 2822 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Loading hg19 length data...
"initial point very close to some inequality constraints"Fetching GO annotations...
For 2822 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Loading hg19 length data...
"initial point very close to some inequality constraints"Fetching GO annotations...
For 18 genes, we 

No enrichment for GO term was found

### Study genes of interest for cardiac function and ekg

In [7]:
ag_nkx = read.table("Coordination_ASE_effects/results/Coordination_at_heart_eQTLs/Coordination_at_heart_eQTLsNKX25_with_exp_iPSC_CM.txt", 
                    header=T, sep="\t", check.names=F)


In [15]:
head(ag_nkx)
head(heart_specific)

gene_id,gene,peakID,varID,combined_pv,combined_fdr,ref_freq,Coefficient,ID,REF,...,614b916c-62e4-4565-8039-04c47b6daf6b,013eb23c-4ff0-493b-b106-9c8c9c2176d6,b958f299-3ced-4ab9-9901-fe7008745909,c20abef2-dd3b-4235-9853-bf297b6bf3a6,b37fa297-2a4d-4bda-9b98-479ac1d6c8a6,936f69d3-5ca1-4464-b090-325880a42c11,6aaf5dd4-ba88-47ee-98fe-e7ff4094adc0,4f7327ef-84bf-4b64-ae5f-50812d0dbf6d,Coefficient_closest,lm_pVal_closest
ENSG00000059691.7,PET112,chr4:152588273-152588544,chr4:152588421,0.0006192851,0.008827206,0.7027501,-1.1655973,rs1366909,A,...,9.09346,9.189137,8.957339,8.932918,9.227882,8.928087,8.966388,9.38033,-0.04834876,0.89534047
ENSG00000064545.10,TMEM161A,chr19:19245629-19245950,chr19:19245771,2.748783e-05,0.0005640539,0.8280051,-0.8435937,rs3746259,A,...,9.276116,9.16394,9.603634,9.25415,9.1817,9.838372,10.077229,9.774231,-1.19220196,0.01294399
ENSG00000065833.7,ME1,chr6:83968321-83968553,chr6:83968419,4.587044e-06,0.0001142103,0.8063184,-1.089785,rs56323856,G,...,8.844969,8.793271,8.27103,9.405032,8.644292,8.465996,8.809454,7.695418,0.00418389,0.99172574
ENSG00000069020.14,MAST4,chr5:65814581-65814990,chr5:65814898,3.162835e-05,0.0006362127,0.1827172,0.9129134,rs12332381,C,...,9.322489,9.918303,9.785758,9.224283,9.473445,9.657965,9.624138,10.095647,-0.02927196,0.94215707
ENSG00000075223.9,SEMA3C,chr7:80579997-80580339,chr7:80580219,2.119206e-13,1.435354e-11,0.1477138,1.0112242,rs12537553,C,...,11.205073,11.396493,10.78248,10.519829,10.04262,9.690264,10.420649,10.868524,-0.75271669,0.1311668
ENSG00000076003.4,MCM6,chr2:136786614-136786823,chr2:136786651,2.128364e-05,0.0004491125,0.2038059,1.3849405,rs12475139,T,...,10.302754,10.765058,10.548264,10.712886,10.818291,10.464139,10.53749,11.223824,-0.66012924,0.10702188


Unnamed: 0,snp_gene,total,Adip,Adre,Arte,Brai,Brea,Cell,Colo,Esop,...,Smal,Sple,Stom,Test,Thyr,Uter,Vagi,Whol,snpID,gene
33,rs10041519_RP11-267A15.1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,rs10041519,RP11-267A15.1
62,rs10072342_PRDM6,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,rs10072342,PRDM6
239,rs10185926_LINC00299,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,rs10185926,LINC00299
278,rs10250368_CRHR2,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,rs10250368,CRHR2
283,rs10251937_STK17A,4,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,rs10251937,STK17A
285,rs10262141_FKBP9,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,rs10262141,FKBP9


In [18]:
dim(ag_nkx)
i=1
eqtls = read.table( paste(gtex_dir, nm[i], ".GtexTable_hreg_aggregate.txt", sep=""), header=T,  sep="\t") 
heart_eqtls    = subset(eqtls, Hear>0)
heart_specific = subset(eqtls, ((eqtls[,2]/2)-eqtls[,"Hear"])<=0)

nkx_specif = subset(ag_nkx, gene %in% heart_eqtls$gene)
dim(nkx_specif)
nkx_specif_signif = subset(nkx_specif, lm_pVal_closest<0.05)
dim(nkx_specif_signif)

In [19]:
nkx_specif_signif

Unnamed: 0,gene_id,gene,peakID,varID,combined_pv,combined_fdr,ref_freq,Coefficient,ID,REF,...,614b916c-62e4-4565-8039-04c47b6daf6b,013eb23c-4ff0-493b-b106-9c8c9c2176d6,b958f299-3ced-4ab9-9901-fe7008745909,c20abef2-dd3b-4235-9853-bf297b6bf3a6,b37fa297-2a4d-4bda-9b98-479ac1d6c8a6,936f69d3-5ca1-4464-b090-325880a42c11,6aaf5dd4-ba88-47ee-98fe-e7ff4094adc0,4f7327ef-84bf-4b64-ae5f-50812d0dbf6d,Coefficient_closest,lm_pVal_closest
2,ENSG00000064545.10,TMEM161A,chr19:19245629-19245950,chr19:19245771,2.748783e-05,0.0005640539,0.8280051,-0.843593718,rs3746259,A,...,9.276116,9.16394,9.603634,9.25415,9.1817,9.838372,10.077229,9.774231,-1.192202,0.01294399
8,ENSG00000099282.5,TSPAN15,chr10:71267711-71267916,chr10:71267849,0.003785882,0.04022837,0.8713095,-0.936700783,rs1236904,A,...,9.948057,9.35477,9.580624,9.54784,8.124246,9.169763,9.705263,10.883798,1.0506522,0.030956
9,ENSG00000099282.5,TSPAN15,chr10:71267711-71267916,chr10:71267834,0.0002073617,0.003418556,0.75604393,-0.936700783,rs1227967,T,...,9.948057,9.35477,9.580624,9.54784,8.124246,9.169763,9.705263,10.883798,1.0506522,0.030956
15,ENSG00000103966.5,EHD4,chr15:42258274-42258599,chr15:42258390,0.0008488348,0.01160394,0.36456813,0.924873885,rs11638729,G,...,11.302383,11.165874,11.22005,10.861002,10.782183,10.952161,10.743839,11.003139,0.8498002,0.001753916
16,ENSG00000104388.10,RAB2A,chr8:61431589-61432066,chr8:61431991,2.051643e-05,0.0004357717,0.25352294,1.313546978,rs13269925,A,...,11.313601,11.548198,10.92977,11.459154,11.048348,10.879558,10.924177,11.134795,0.8436744,0.02815588
17,ENSG00000104388.10,RAB2A,chr8:61433560-61433957,chr8:61433851,0.0001178618,0.002066156,0.36728057,1.039877654,rs6987523,G,...,11.313601,11.548198,10.92977,11.459154,11.048348,10.879558,10.924177,11.134795,0.8436744,0.02815588
21,ENSG00000111802.9,TDP2,chr6:24584095-24584935,chr6:24584366,0.001568764,0.01956763,0.59294039,-1.023985184,rs2817200,A,...,9.770291,10.516512,10.350955,9.807161,9.617964,10.101803,9.957836,10.238394,-0.5088343,0.04915486
22,ENSG00000112304.6,ACOT13,chr6:24584095-24584935,chr6:24584366,0.001568764,0.01956763,0.59294039,-1.023985184,rs2817200,A,...,9.846777,10.268488,10.467407,9.635038,9.50805,10.415258,10.035451,10.344174,-0.6251938,0.01346837
26,ENSG00000114450.5,GNB4,chr3:179172816-179173064,chr3:179172979,2.0771390000000002e-17,2.128903e-15,0.07816854,1.654799913,rs7612445,G,...,9.953491,10.421849,9.395981,10.028223,9.55892,8.846764,9.435594,9.355391,1.0107446,0.02585736
30,ENSG00000117640.13,MTFR1L,chr1:26144532-26144949,chr1:26144674,2.069934e-05,0.0004382153,0.60410803,-0.850019637,rs6872,G,...,11.054027,10.712952,11.43323,10.79316,10.762853,11.496644,11.254852,11.743142,-0.9806494,0.005081284


### Find genes in common for H3K27ac and NKX2-5

In [118]:
merged = merge( ag, ag_nkx, by = "gene_id")
dim(merged)

In [120]:
test = subset (ag_nkx, gene=="CLCNKA")

In [125]:
test = subset (ag_nkx, gene=="ITGB3")

In [126]:
test

Unnamed: 0,gene_id,gene,peakID,varID,combined_pv,combined_fdr,ref_freq,Coefficient,ID,REF,...,614b916c-62e4-4565-8039-04c47b6daf6b,013eb23c-4ff0-493b-b106-9c8c9c2176d6,b958f299-3ced-4ab9-9901-fe7008745909,c20abef2-dd3b-4235-9853-bf297b6bf3a6,b37fa297-2a4d-4bda-9b98-479ac1d6c8a6,936f69d3-5ca1-4464-b090-325880a42c11,6aaf5dd4-ba88-47ee-98fe-e7ff4094adc0,4f7327ef-84bf-4b64-ae5f-50812d0dbf6d,Coefficient_closest,lm_pVal_closest
121,ENSG00000259207.3,ITGB3,chr17:45329559-45329879,chr17:45329682,4.232363e-13,2.827073e-11,0.3190647,1.402174,rs2317385,G,...,10.02181,8.531405,8.832495,9.107417,9.137693,8.718776,8.694925,7.839376,1.040895,0.01456749


In [127]:
colnames(test)