#### Tissue-specific RV eGenes

In [1]:
library(data.table)
library(dplyr)

load.data <- function(tissue) {
    filename <- paste("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/qvals/", tissue, ".lrt.q", sep="")
    
    return(fread(filename, data.table=F))
}

get.egenes <- function(qvals) {
    egenes = qvals$Gene_ID[apply(qvals, 1, function(x) {any(as.numeric(x[-1]) < 0.05)})]
    return(egenes)
}

get.tissue.specific.genes <- function(egenes.list) {
    res = vector("list", length(egenes.list))
    names(res) = names(egenes.list)
    for (i in 1:length(egenes.list)) {
        res[[i]] = egenes.list[[i]][!egenes.list[[i]] %in% unique(unlist(egenes.list[-i]))]
    }
    return(res)
}


Attaching package: ‘dplyr’

The following objects are masked from ‘package:data.table’:

    between, first, last

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
sample.info = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v8/result_summary/tissue.name.match.csv")
tissues = sample.info$tissue

q.data = lapply(tissues, load.data)
names(q.data) = tissues

In [3]:
egenes = lapply(q.data, get.egenes)

In [4]:
res = get.tissue.specific.genes(egenes)

In [6]:
fwrite(as.list(res$Lung), "../tissue_specific_egenes_by_tissue/Lung.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Liver), "../tissue_specific_egenes_by_tissue/Liver.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Whole_Blood), "../tissue_specific_egenes_by_tissue/Whole_Blood.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Sun_Exposed_Lower_leg), "../tissue_specific_egenes_by_tissue/Skin_Sun_Exposed_Lower_leg.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Not_Sun_Exposed_Suprapubic), "../tissue_specific_egenes_by_tissue/Skin_Not_Sun_Exposed_Suprapubic.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Atrial_Appendage), "../tissue_specific_egenes_by_tissue/Heart_Atrial_Appendage.tissue.specifc.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Left_Ventricle), "../tissue_specific_egenes_by_tissue/Heart_Left_Ventricle.tissue.specifc.rv.egenes.tsv", sep="\n")

#### Tissue-specific non-RV eGenes

In [7]:
get.non.egenes <- function(qvals) {
    egenes = qvals$Gene_ID[apply(qvals, 1, function(x) {all(as.numeric(x[-1]) >= 0.05)})]
    return(egenes)
}

In [8]:
non.egenes = lapply(q.data, get.non.egenes)

In [9]:
res = get.tissue.specific.genes(non.egenes)

In [10]:
fwrite(as.list(res$Lung), "../tissue_specific_egenes_by_tissue/Lung.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Liver), "../tissue_specific_egenes_by_tissue/Liver.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Whole_Blood), "../tissue_specific_egenes_by_tissue/Whole_Blood.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Sun_Exposed_Lower_leg), "../tissue_specific_egenes_by_tissue/Skin_Sun_Exposed_Lower_leg.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Skin_Not_Sun_Exposed_Suprapubic), "../tissue_specific_egenes_by_tissue/Skin_Not_Sun_Exposed_Suprapubic.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Atrial_Appendage), "../tissue_specific_egenes_by_tissue/Heart_Atrial_Appendage.tissue.specifc.non.rv.egenes.tsv", sep="\n")
fwrite(as.list(res$Heart_Left_Ventricle), "../tissue_specific_egenes_by_tissue/Heart_Left_Ventricle.tissue.specifc.non.rv.egenes.tsv", sep="\n")

In [11]:
length(non.egenes$Lung)

#### RV eGenes example outlier

In [12]:
library(data.table)
library(dplyr)

In [20]:
target.snp = "chr20_57598808_G_A_b38"
geno = fread("/u/project/eeskin2/k8688933/rare_var/genotypes/v8/all_eur_samples_matrix_maf0.05/chr.20.genotypes.matrix.tsv")

In [21]:
indiv = colnames(geno)[which(geno %>% filter(ID == target.snp) != 0)][-1]
print(indiv)

ERROR: Error in matrix(if (is.null(value)) logical() else value, nrow = nr, dimnames = list(rn, : length of 'dimnames' [2] not equal to array extent


In [5]:
z.heart.lv = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v7/sungoohw/result_summary/log2.standardized.corrected.tpm.egenes.only/log2.standardized.corrected.lrt.tpm.Heart_Left_Ventricle")
z.heart.aa = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v7/sungoohw/result_summary/log2.standardized.corrected.tpm.egenes.only/log2.standardized.corrected.lrt.tpm.Heart_Atrial_Appendage")
z.skin.sun = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v7/sungoohw/result_summary/log2.standardized.corrected.tpm.egenes.only/log2.standardized.corrected.lrt.tpm.Skin_Sun_Exposed_Lower_leg")
z.skin.not.sun = fread("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v7/sungoohw/result_summary/log2.standardized.corrected.tpm.egenes.only/log2.standardized.corrected.lrt.tpm.Skin_Not_Sun_Exposed_Suprapubic")

In [6]:
print(indiv %in% colnames(z.heart.lv)) # this SNP is not in heart left ventricle
print(indiv %in% colnames(z.heart.aa))
print(indiv %in% colnames(z.skin.not.sun))
print(indiv %in% colnames(z.skin.sun))

[1] FALSE
[1] TRUE
[1] FALSE
[1] TRUE


In [7]:
print("ENSG00000101162" %in% z.heart.lv$gene)
print("ENSG00000101162" %in% z.heart.aa$gene)
print("ENSG00000101162" %in% z.skin.not.sun$gene)
print("ENSG00000101162" %in% z.skin.sun$gene)

[1] TRUE
[1] FALSE
[1] FALSE
[1] TRUE


In [12]:
z.heart.lv %>% filter(gene == "ENSG00000101162") %>% select(indiv)

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(indiv)` instead of `indiv` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



ERROR: Error: Can't subset columns that don't exist.
[31m✖[39m Column `GTEX-1192W` doesn't exist.


In [17]:
z.heart.aa %>% filter(gene == "ENSG00000101162") %>% select(indiv)

GTEX-1192W
<dbl>


In [40]:
idx = which(z.skin.sun$gene == "ENSG00000101162")
z.skin.sun[idx, -1]

GTEX-111FC,GTEX-111VG,GTEX-1122O,GTEX-1128S,GTEX-113JC,GTEX-117XS,GTEX-117YW,GTEX-1192W,GTEX-1192X,GTEX-11DXX,⋯,GTEX-ZVZP,GTEX-ZWKS,GTEX-ZXES,GTEX-ZXG5,GTEX-ZYFD,GTEX-ZYFG,GTEX-ZYT6,GTEX-ZYY3,GTEX-ZZ64,GTEX-ZZPT
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.2039781,0.1152965,0.2654655,0.07899095,-0.2788954,0.1023136,-0.3533411,0.8834474,-0.1525341,-0.2199295,⋯,-0.5025168,0.5314344,-0.02838276,-0.7154214,-0.2346717,1.058739,-0.8418631,0.9270844,-0.4574895,-0.1990843


In [41]:
scaled.z.skin.sun = scale(t(as.data.frame(z.skin.sun)[idx, -1]))
colnames(scaled.z.skin.sun) = c("z")
as.data.frame(scaled.z.skin.sun)[indiv, ] #%>% filter(abs(z) > 2)

In [28]:
idx = which(z.skin.sun$gene == "ENSG00000101162")
colnames(z.skin.sun)[which(abs(z.skin.sun[idx, -1]) > 2)]

In [20]:
z.skin.not.sun %>% filter(gene == "ENSG00000101162") %>% select(indiv)

ERROR: Error: Can't subset columns that don't exist.
[31m✖[39m Column `GTEX-1192W` doesn't exist.


#### RV eGenes example outliers in all tissues

In [32]:
z.scores = lapply(dir("/u/project/eeskin2/k8688933/rare_var/results/tss_20k_v7/sungoohw/result_summary/log2.standardized.corrected.tpm.egenes.only/",
                     pattern="log2.standardized.corrected.lrt.tpm", full.names=T), function(x) {if(file.size(x) > 1) {fread(x, data.table=F)}})
names(z.scores) = fread("../egene.counts.csv")$tissue

In [38]:
z.scores[[17]]

gene,GTEX-11GSP,GTEX-12WSA,GTEX-12WSE,GTEX-12WSF,GTEX-12WSI,GTEX-12WSM,GTEX-12ZZW,GTEX-12ZZY,GTEX-12ZZZ,⋯,GTEX-Z93S,GTEX-ZAB4,GTEX-ZDXO,GTEX-ZE7O,GTEX-ZF28,GTEX-ZUA1,GTEX-ZV68,GTEX-ZVT3,GTEX-ZVZQ,GTEX-ZXG5
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSG00000138400,-0.1425735261,0.559858,-0.009521177,0.096497876,0.1447075,-0.18602453,-0.38944793,0.43207037,0.08791188,⋯,-0.2806467,0.2665336,-0.31800346,-0.13203161,-0.01970965,0.40001689,0.36359174,-0.76132422,0.2177883,0.08682875
ENSG00000204305,0.487379293,1.0799562,0.182151643,0.007546178,0.61044826,0.02626529,0.44288272,0.33717416,0.67086188,⋯,0.1444489,-0.6254287,-0.77314893,0.08544676,-0.5761219,-0.47990811,-0.40994767,-0.07367223,0.7407547,-0.35066421
ENSG00000234444,-0.0267629823,0.3769541,-0.171975506,0.417429907,0.09382641,0.19029165,-0.22850692,0.16660748,0.33456041,⋯,-0.4282121,-0.1215563,-0.09824164,0.18849717,-0.24032572,0.14761464,-0.46129846,-0.47910572,0.5160244,-0.02622756
ENSG00000042088,-0.5187204168,0.100174,0.209528847,-0.081237878,0.42132292,-0.0954999,-0.07940635,-0.04602879,-0.43201383,⋯,-0.0401152,-0.1684276,0.01412545,0.38891589,-0.13459428,-0.23815591,0.08085638,-0.11412633,0.3103567,0.17618307
ENSG00000248905,-0.4271095282,-0.29599,-0.136268182,1.847318094,-0.20999195,-0.20693268,-0.21319501,0.05375057,-0.67372492,⋯,2.4275551,0.1367045,0.54651824,0.11398416,-1.31615622,-0.16591567,-0.03676289,0.47900141,-0.3659196,-0.05090383
ENSG00000102886,0.8745975203,1.190855,-0.213096408,0.335269825,-0.53508456,0.50453505,-0.28209968,0.27273912,0.42907571,⋯,0.2574764,-0.4445439,-0.67180209,-0.17533581,-0.88923589,0.14716581,-0.91046848,-0.21927518,0.3987926,0.51970677
ENSG00000100031,-0.0002616167,1.4961935,0.639553142,0.31486341,0.26877783,-0.19559533,-0.64345072,0.17573649,0.28672154,⋯,-0.3396258,0.4261632,-0.09855915,-0.1086691,-0.21766317,-0.08002626,0.01719741,-0.17923401,0.441324,0.67735989


In [40]:
for (i in 1:48) {
    z = z.scores[[i]]
    if (is.null(z)) {
        next
    }
    if (!indiv %in% colnames(z)) {
        next
    }
    if (!"ENSG00000101162" %in% z$gene) {
        next
    }
    idx = which(z$gene == "ENSG00000101162")
    scaled.z = scale(t(as.data.frame(z)[idx, -1]))
    colnames(scaled.z) = c("z")
    print(names(z.scores)[[i]])
    print(as.data.frame(scaled.z)[indiv, ])
}

[1] "Cells_Transformed_fibroblasts"
[1] -0.5617182
[1] "Pituitary"
[1] 1.693557
[1] "Skin_Sun_Exposed_Lower_leg"
[1] 1.198344
[1] "Thyroid"
[1] 0.04230034
