# Table 1. For each tissue, report:
* Number of samples included
* Num genes tested
* Number of eSTRs
* GCTA: % cis h2 explained by STRs
* GCTA: total cis h2 explained by STRs
* Number passing ANOVA p<0.05 (after FDR adjustment)
* CAVIAR: number of eSTRs that have the top CAVIAR score compared to nearby SNPs

# TODO
* Why is anova p-value significant for all eSTRs? That can't be right
* Why are h2 values so high?
* Number of samples for each tissue

In [16]:
# Set up
%pylab inline

# Allow us to edit fonts in Illustrator
import matplotlib
matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True

# Libraries
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.stats

# Params
QVAL = 0.1
ANOVATHRESH = 0.05
SHORTEN = {
    "WholeBlood": "Blood",
    "Cells-Transformedfibroblasts": "Fibr.",
    "Muscle-Skeletal": "Muscle",
    "Artery-Tibial": "Artery",
    "Adipose-Subcutaneous": "Adipose",
    "Lung": "Lung",
    "Esophagus-Mucosa": "Esophagus",
}

# Path to data
RESDIR = "/storage/szfeupe/Runs/GTEx_estr/Analysis_by_Tissue/"
TISSUES = [item for item in list(SHORTEN.keys())]

data = {}
for t in TISSUES:
    data[t] = data[t] = pd.read_csv(os.path.join(RESDIR, t, "Master.table"), sep="\t")

Populating the interactive namespace from numpy and matplotlib


In [None]:
num_genes = []
num_estrs = []
anova_pass = []
num_caviar_best = []
gcta_perc_estr = []
gcta_perc_all = []
gcta_r2_estr = []
gcta_r2_all = []
num_samples = []

for t in TISSUES:
    xall = data[t]
    xsig = xall[xall["qvalue"]<=QVAL]
    num_genes.append(xall.shape[0])
    num_estrs.append(xsig.shape[0])
    anova_pass.append(xsig[(xsig["AD.pval"]<ANOVATHRESH)].shape[0])
    num_caviar_best.append(xsig[(xsig["best.str.score"]>=xsig["top.variant.score"])].shape[0])
    gcta_perc_estr.append(np.mean(xsig[~np.isnan(xsig["cis_str_h2"])].apply(lambda x: x["cis_str_h2"]/(x["cis_str_h2"]+x["cis_snp_h2"]), 1)))
    gcta_perc_all.append(np.mean(xall[~np.isnan(xall["cis_str_h2"])].apply(lambda x: x["cis_str_h2"]/(x["cis_str_h2"]+x["cis_snp_h2"]), 1)))
    gcta_r2_estr.append(np.mean(xsig[~np.isnan(xsig["cis_str_h2"])].apply(lambda x: x["cis_str_h2"], 1)))
    gcta_r2_all.append(np.mean(xall[~np.isnan(xall["cis_str_h2"])].apply(lambda x: x["cis_str_h2"], 1)))
    num_samples.append("?") # TODO
    
mtable = pd.DataFrame({
    "00_samples": num_samples,
    "0_tissue": [SHORTEN[t] for t in TISSUES],
    "1_numgenes": num_genes,
    "2_num.estrs": num_estrs,
    "3_gcta.estr.perch2": gcta_perc_estr,
    "4_gcta.all.perch2": gcta_perc_all,
    "5_gcta.estr.r2": gcta_r2_estr,
    "6_gcta.all.r2": gcta_r2_all,
    "7_anova.pass": anova_pass,
    "8_num_caviar_best": num_caviar_best,
})

mtable