In [7]:
library(stringr)
home       = '/projects/CARDIPS/analysis/family1070'
dir_ase = c("ase_rnas_v4/analysis/CM.genes.ase_snv.txt",
            "ase_rnas_v4/analysis/iPSC.genes.ase_snv.txt",
            'ase_chips_v4/analysis/CM_H3K27AC_ase_snv.txt',
            'ase_chips_v4/analysis/CM_NKX_ase_snv.txt',
            'ase_chips_v4/analysis/CM_SRF_ase_snv.txt',
            'ase_chips_v4/analysis/iPSC_H3K27AC_ase_snv.txt')
marks      =  c('RNA_CM', 'RNA_IPSC', 'H3K27AC_CM', 'NKX25_CM', 'SRF_CM', 'H3K27AC_IPSC' )
setwd(home)

ase_files = paste(home, dir_ase, sep="/")


In [14]:
rna_cols  =  c('contig','position','coord','variantID','refAllele','altAllele', 'feature')
chip_cols =  c('contig','position','coord','variantID','refAllele','altAllele')


In [15]:
for (f in 1:length(marks)){

if (f<3) {snv=read.table(ase_files[f], sep="\t", header=T, row.names=1)
         out_dir='ase_rnas_v4/analysis/' }
if (f>2) {snv=read.table(ase_files[f], sep="\t", header=T)
          out_dir='ase_chips_v4/analysis/' }

    cat(marks[f], "\n")
    

#Set the minumum pvalue different than 0
snv$binomialPValue[snv$binomialPValue==0] <-min(snv$binomialPValue[snv$binomialPValue>0])

snv$ref_is_major = snv$refCount>snv$altCount
snv$ref_freq = snv$refCount/snv$totalCount
#### Make a meta analysis of pvalues in different people
## Calculate Z scores from binomial p-value
snv$z_score=qnorm(snv$binomialPValue/2)*c(-1,1)[snv$ref_is_major+1]
## Combine the Z-scores using the Z-transform test from Stouffer 1948 DOI: 10.1111/j.1420-9101.2005.00917.x

ag1=aggregate(z_score~coord,snv, sum)
ag2=aggregate(z_score~coord,snv, length)
czs=ag1$z_score/sqrt(ag2$z_score)
ag1$combined_pv=pnorm(abs(czs),lower.tail=FALSE)*2
ag1$combined_fdr=p.adjust(ag1$combined_pv, method="BH")


#### Now calculate the average of allele frequency in the ones that were tested and merge with the rest of the data
avg=aggregate(ref_freq~coord,snv,mean) #note that the mean is calculated on all the het subjects

ag=merge(ag1[,-2], avg, by="coord")

###retrieve snp infp and write it on files for annotations
if (f<3) {snv2=snv[,rna_cols ]    }
if (f>2) {snv2=snv[,chip_cols ]    }

snv2=snv2[!duplicated(snv2),]

m=merge(ag, snv2,  by="coord")

write.table(m, paste(out_dir, "ASE.tested.combined.", marks[f], ".txt", sep=""), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)               
}


RNA_CM 
RNA_IPSC 
H3K27AC_CM 
NKX25_CM 
SRF_CM 
H3K27AC_IPSC 


In [22]:
m= read.table(paste(out_dir, "ASE.tested.combined.", marks[4], ".txt", sep=""), sep="\t", row.names=1, header=TRUE) 
m=subset(m,combined_fdr<0.05 )
m<-m[order(m$ref_freq),]
m[1:50,]

Unnamed: 0,combined_pv,combined_fdr,ref_freq,contig,position,variantID,refAllele,altAllele
chr1:147012918,1.523442e-10,9.258795e-09,0.0,chr1,147012918,.,C,G
chr1:21334542,9.38765e-28,2.457707e-25,0.0,chr1,21334542,rs10916921,C,T
chr11:129939826,0.003745318,0.04456966,0.0,chr11,129939826,rs80070284,C,T
chr12:83162064,1.455455e-11,9.992724e-10,0.0,chr12,83162064,rs7296227,C,T
chr16:7886786,1.348943e-07,5.374117e-06,0.0,chr16,7886786,rs12920730,T,G
chr16:85997596,3.978488e-14,3.717e-12,0.0,chr16,85997596,rs11860050,C,A
chr18:55288790,0.00021387,0.003996267,0.0,chr18,55288790,rs446777,C,G
chr2:102759293,0.0002388978,0.004391591,0.0,chr2,102759293,rs1800919,A,C
chr3:188101703,0.0004882908,0.008209626,0.0,chr3,188101703,rs2030517,A,C
chr4:69959183,0.001953289,0.02636719,0.0,chr4,69959183,rs62296943,C,T
