#### Tissue-specific RV eGenes

In [1]:
library(data.table)
library(dplyr)

load.data <- function(tissue) {
    filename <- paste("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/qvals/", tissue, ".lrt.q", sep="")
    
    return(fread(filename, data.table=F))
}

get.egenes <- function(qvals) {
    egenes = qvals$Gene_ID[apply(qvals, 1, function(x) {any(as.numeric(x[-1]) < 0.05)})]
    return(egenes)
}

get.tissue.specific.genes <- function(egenes.list) {
    res = vector("list", length(egenes.list))
    names(res) = names(egenes.list)
    for (i in 1:length(egenes.list)) {
        res[[i]] = egenes.list[[i]][!egenes.list[[i]] %in% unique(unlist(egenes.list[-i]))]
    }
    return(res)
}


Attaching package: ‘dplyr’

The following objects are masked from ‘package:data.table’:

    between, first, last

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
sample.info = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/tissue.name.match.csv")
tissues = sample.info$tissue

q.data = lapply(tissues, load.data)
names(q.data) = tissues

In [3]:
egenes = lapply(q.data, get.egenes)

In [4]:
res = get.tissue.specific.genes(egenes)

In [6]:
fwrite(as.list(res$Lung), "../tissue_specific_egenes_by_tissue/Lung.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Liver), "../tissue_specific_egenes_by_tissue/Liver.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Whole_Blood), "../tissue_specific_egenes_by_tissue/Whole_Blood.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Sun_Exposed_Lower_leg), "../tissue_specific_egenes_by_tissue/Skin_Sun_Exposed_Lower_leg.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Not_Sun_Exposed_Suprapubic), "../tissue_specific_egenes_by_tissue/Skin_Not_Sun_Exposed_Suprapubic.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Atrial_Appendage), "../tissue_specific_egenes_by_tissue/Heart_Atrial_Appendage.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Left_Ventricle), "../tissue_specific_egenes_by_tissue/Heart_Left_Ventricle.tissue.specifc.rv.egenes.tsv", sep="\n")

#### Tissue-specific non-RV eGenes

In [5]:
get.non.egenes <- function(qvals) {
    egenes = qvals$Gene_ID[apply(qvals, 1, function(x) {all(as.numeric(x[-1]) >= 0.05)})]
    return(egenes)
}

In [6]:
non.egenes = lapply(q.data, get.non.egenes)

In [7]:
res = get.tissue.specific.genes(non.egenes)

In [10]:
fwrite(as.list(res$Lung), "../tissue_specific_egenes_by_tissue/Lung.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Liver), "../tissue_specific_egenes_by_tissue/Liver.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Whole_Blood), "../tissue_specific_egenes_by_tissue/Whole_Blood.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Sun_Exposed_Lower_leg), "../tissue_specific_egenes_by_tissue/Skin_Sun_Exposed_Lower_leg.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Not_Sun_Exposed_Suprapubic), "../tissue_specific_egenes_by_tissue/Skin_Not_Sun_Exposed_Suprapubic.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Atrial_Appendage), "../tissue_specific_egenes_by_tissue/Heart_Atrial_Appendage.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Left_Ventricle), "../tissue_specific_egenes_by_tissue/Heart_Left_Ventricle.tissue.specifc.non.rv.egenes.tsv", sep="\n")

In [8]:
length(non.egenes$Lung)

#### RV eGenes example outlier

In [5]:
library(data.table)
library(dplyr)

In [9]:
target.snp = "chr20_59023753_G_A_b38"
geno = fread("/u/project/eeskin2/k8688933/rare_var/genotypes/v8/all_eur_samples_matrix_maf0.05/chr.20.genotypes.matrix.tsv")

In [10]:
indiv = colnames(geno)[which(geno %>% filter(ID == target.snp) != 0)][-1]
print(indiv)

[1] "GTEX-1192W" "GTEX-1GPI6"


In [12]:
z.heart.lv = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/log2.standardized.corrected.tpm.rv.egenes.only/log2.standardized.corrected.lrt.rv.egenes.tpm.Heart_Left_Ventricle")
z.heart.aa = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/log2.standardized.corrected.tpm.rv.egenes.only/log2.standardized.corrected.lrt.rv.egenes.tpm.Heart_Atrial_Appendage")
z.skin.sun = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/log2.standardized.corrected.tpm.rv.egenes.only/log2.standardized.corrected.lrt.rv.egenes.tpm.Skin_Sun_Exposed_Lower_leg")
z.skin.not.sun = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/log2.standardized.corrected.tpm.rv.egenes.only/log2.standardized.corrected.lrt.rv.egenes.tpm.Skin_Not_Sun_Exposed_Suprapubic")

In [13]:
print(indiv %in% colnames(z.heart.lv)) # this SNP is not in heart left ventricle
print(indiv %in% colnames(z.heart.aa))
print(indiv %in% colnames(z.skin.not.sun))
print(indiv %in% colnames(z.skin.sun))

[1] FALSE FALSE
[1]  TRUE FALSE
[1] FALSE  TRUE
[1] TRUE TRUE


In [15]:
print("ENSG00000101162.3" %in% z.heart.lv$gene)
print("ENSG00000101162.3" %in% z.heart.aa$gene)
print("ENSG00000101162.3" %in% z.skin.not.sun$gene)
print("ENSG00000101162.3" %in% z.skin.sun$gene)

[1] TRUE
[1] FALSE
[1] TRUE
[1] TRUE


In [17]:
z.heart.lv %>% filter(gene == "ENSG00000101162.3") %>% select(indiv)

ERROR: Unknown columns `GTEX-1192W` and `GTEX-1GPI6` 

In [24]:
z.heart.aa %>% filter(gene == "ENSG00000101162.3") %>% select(indiv[1])

GTEX-1192W


In [19]:
idx = which(z.skin.sun$gene == "ENSG00000101162.3")
z.skin.sun[idx, -1]

GTEX-111FC,GTEX-111VG,GTEX-1122O,GTEX-1128S,GTEX-113JC,GTEX-117XS,GTEX-117YW,GTEX-1192W,GTEX-1192X,GTEX-11DXX,⋯,GTEX-ZVZP,GTEX-ZWKS,GTEX-ZXES,GTEX-ZXG5,GTEX-ZYFD,GTEX-ZYFG,GTEX-ZYT6,GTEX-ZYY3,GTEX-ZZ64,GTEX-ZZPT
0.4485603,-0.08238693,0.1169846,0.01343363,-0.5306936,-0.1180192,-0.1543491,0.8902476,-0.562793,-0.6218577,⋯,-0.7511755,0.4598045,-0.2550396,-1.110967,-0.03891543,0.6415389,-0.6896812,0.8737349,-0.2658977,0.00853221


In [20]:
scaled.z.skin.sun = scale(t(as.data.frame(z.skin.sun)[idx, -1]))
colnames(scaled.z.skin.sun) = c("z")
as.data.frame(scaled.z.skin.sun)[indiv, ] #%>% filter(abs(z) > 2)

In [21]:
idx = which(z.skin.sun$gene == "ENSG00000101162.3")
colnames(z.skin.sun)[which(abs(z.skin.sun[idx, -1]) > 2)]

In [25]:
z.skin.not.sun %>% filter(gene == "ENSG00000101162.3") %>% select(indiv[2])

GTEX-1GPI6
0.1479837


In [23]:
z.skin.sun %>% filter(gene == "ENSG00000101162.3") %>% select(indiv)

GTEX-1192W,GTEX-1GPI6
0.8902476,-0.5438698


#### RV eGenes example outliers in all tissues

In [32]:
z.scores = lapply(dir("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/log2.standardized.corrected.tpm.rv.egenes.only/",
                     pattern="log2.standardized.corrected.lrt.rv.egenes.tpm", full.names=T), function(x) {if(file.size(x) > 1) {fread(x, data.table=F)}})
names(z.scores) = fread("../egene.counts.csv")$tissue

In [33]:
z.scores[[17]]

NULL

In [41]:
for (i in 1:48) {
    z = z.scores[[i]]
    if (is.null(z)) {
        next
    }
    if (!any(indiv %in% colnames(z))) {
        next
    }
    if (!"ENSG00000101162.3" %in% z$gene) {
        next
    }
    idx = which(z$gene == "ENSG00000101162.3")
    scaled.z = scale(t(as.data.frame(z)[idx, -1]))
    colnames(scaled.z) = c("z")
    print(names(z.scores)[[i]])
    print(as.data.frame(scaled.z)[indiv[which(indiv %in% row.names(scaled.z))], ])
}

[1] "Artery_Tibial"
[1] 0.3049973
[1] "Breast_Mammary_Tissue"
[1] 0.9585913
[1] "Cells_Cultured_fibroblasts"
[1] -0.9428877
[1] "Muscle_Skeletal"
[1] -0.3236698
[1] "Nerve_Tibial"
[1] -0.9148684
[1] "Pituitary"
[1]  0.9132187 -0.2465423
[1] "Skin_Not_Sun_Exposed_Suprapubic"
[1] 0.2872161
[1] "Skin_Sun_Exposed_Lower_leg"
[1]  1.1471211 -0.7007989
[1] "Thyroid"
[1] -0.06988852
