In [1]:
# Set up
%pylab inline

# Allow us to edit fonts in Illustrator
import matplotlib
matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True

# Libraries
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.stats

# Params
QVAL = 0.1
ANOVATHRESH = 0.05
SHORTEN = {
    "WholeBlood": "Blood",
    "Cells-Transformedfibroblasts": "Fibr.",
    "Muscle-Skeletal": "Muscle",
    "Artery-Tibial": "Artery",
    "Adipose-Subcutaneous": "Adipose",
    "Lung": "Lung",
    "Esophagus-Mucosa": "Esophagus",
}
NSAMPLES = {
    "WholeBlood": 144,
    "Cells-Transformedfibroblasts": 128,
    "Muscle-Skeletal": 125,
    "Artery-Tibial": 108,
    "Adipose-Subcutaneous": 102,
    "Esophagus-Mucosa": 102,
    "Lung": 110
}

# Path to data
RESDIR = "/storage/szfeupe/Runs/GTEx_estr/Analysis_by_Tissue/"
TISSUES = [item for item in list(SHORTEN.keys())]

data = {}
for t in TISSUES:
    data[t] = data[t] = pd.read_csv(os.path.join(RESDIR, t, "Master.table"), sep="\t")

Populating the interactive namespace from numpy and matplotlib


In [2]:
num_genes = []
num_estrs = []
anova_pass = []
num_caviar_best = []
gcta_perc_estr = []
gcta_perc_all = []
gcta_r2_estr = []
gcta_r2_all = []
num_samples = []

for t in TISSUES:
    xall = data[t]
    xsig = xall[xall["qvalue"]<=QVAL]
    num_genes.append(xall.shape[0])
    num_estrs.append(xsig.shape[0])
    anova_pass.append(xsig[(xsig["anova_pval"]<ANOVATHRESH)].shape[0])
    num_caviar_best.append(xsig[(xsig["best.str.score"]>=xsig["top.variant.score"])].shape[0])
    gcta_perc_estr.append(np.mean(xsig[~np.isnan(xsig["cis_str_h2"])].apply(lambda x: x["cis_str_h2"]/(x["cis_str_h2"]+x["cis_snp_h2"]), 1)))
    gcta_perc_all.append(np.mean(xall[~np.isnan(xall["cis_str_h2"])].apply(lambda x: x["cis_str_h2"]/(x["cis_str_h2"]+x["cis_snp_h2"]), 1)))
    gcta_r2_estr.append(np.mean(xsig[~np.isnan(xsig["cis_str_h2"])].apply(lambda x: x["cis_str_h2"], 1)))
    gcta_r2_all.append(np.mean(xall[~np.isnan(xall["cis_str_h2"])].apply(lambda x: x["cis_str_h2"], 1)))
    num_samples.append(NSAMPLES[t])
    
mtable = pd.DataFrame({
    "00_tissue": [SHORTEN[t] for t in TISSUES],
    "0_samples": num_samples,
    "1_numgenes": num_genes,
    "2_num.estrs": num_estrs,
    "3_gcta.estr.perch2": gcta_perc_estr,
    "4_gcta.all.perch2": gcta_perc_all,
    "5_gcta.estr.r2": gcta_r2_estr,
    "6_gcta.all.r2": gcta_r2_all,
    "7_anova.pass": anova_pass,
    "8_num_caviar_best": num_caviar_best,
})

mtable.sort("2_num.estrs", ascending=False)



Unnamed: 0,00_tissue,0_samples,1_numgenes,2_num.estrs,3_gcta.estr.perch2,4_gcta.all.perch2,5_gcta.estr.r2,6_gcta.all.r2,7_anova.pass,8_num_caviar_best
2,Fibr.,128,15362,955,0.669205,0.795474,0.125969,0.044704,235,151
6,Blood,144,14845,630,0.679351,0.81467,0.114938,0.038565,131,90
5,Artery,108,15584,524,0.698473,0.820082,0.154555,0.05098,129,72
0,Esophagus,102,16111,475,0.719099,0.82826,0.163506,0.05239,107,87
3,Lung,110,15711,378,0.715036,0.833435,0.154623,0.047877,81,69
4,Adipose,102,15918,376,0.723583,0.830789,0.175574,0.051878,78,61
1,Muscle,125,15275,304,0.727204,0.837351,0.143855,0.041752,54,38
