In [2]:
"""
Supplementary Figure 3: 
PEER factor analysis heatmap
"""
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as ss
import matplotlib.gridspec as gridspec
import seaborn.apionly as sns

def correl(X, Y):
    """
    Computes the Pearson correlation coefficient and a 95% confidence
    interval based on the data in X and Y
    """
    r = np.corrcoef(X, Y)[0,1]
    f = 0.5*np.log((1+r)/(1-r))
    se = 1/np.sqrt(len(X)-3)
    ucl = f + 2*se
    lcl = f - 2*se
    lcl = (np.exp(2*lcl) - 1) / (np.exp(2*lcl) + 1)
    ucl = (np.exp(2*ucl) - 1) / (np.exp(2*ucl) + 1)
    return r,lcl,ucl

SHORTEN = {
    "Artery-Aorta":"Artery A."     ,
    "Artery-Tibial": "Artery T.",
    "Adipose-Subcutaneous": "Adipose S.",    
    "Adipose-Visceral(Omentum)":"Adipose V.",
    "Brain-Caudate(basalganglia)":"Caudate"   , 
    "Brain-Cerebellum":"Cerebellum",
    "Cells-Transformedfibroblasts": "Fibroblast",
    "Esophagus-Mucosa": "E. Mucosa",
    "Esophagus-Muscularis":"E Muscularis",
    "Heart-LeftVentricle":"Ventricule",
    "Lung": "Lung",
    "Muscle-Skeletal": "Muscle",
    "Nerve-Tibial":"Nerve",
    "Skin-NotSunExposed(Suprapubic)": "Skin Unexposed",
    "Skin-SunExposed(Lowerleg)":"Skin Leg",
    "Thyroid":"Thyroid",
    "WholeBlood": "Blood"
}
TISSUES = [item for item in list(SHORTEN.keys()) if item != "permuted"]


Tissues = list(SHORTEN.keys())
#Phenotypes1='/storage/resources/datasets/gtex/53844/PhenoGenotypeFiles/RootStudyConsentSet_phs000424.GTEx.v6.p1.c1.GRU/PhenotypeFiles/phs000424.v6.pht002743.v6.p1.c1.GTEx_Sample_Attributes.GRU.txt.gz'
Phenotypes2='/storage/resources/datasets/gtex/59533/PhenoGenotypeFiles/RootStudyConsentSet_phs000424.GTEx.v7.p2.c1.GRU/PhenotypeFiles/phs000424.v7.pht002743.v7.p2.c1.GTEx_Sample_Attributes.GRU.txt.gz'

In [1]:
def plotheatmap(T):
    PF ="/storage/szfeupe/Runs/650GTEx_estr/Analysis_by_Tissue/"+T+"/peerFactors.tsv"
    Index=['V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14']
    #Get IDs
    peerfactor = pd.read_csv(PF,sep='\t')
    idx = sorted([x.replace('.','-') for x in list(peerfactor.index)])
    peerfactor.index = idx

    #Get covariates & Other phenotypes
        #AGE - RACE - ETHNICITY
    IT=pd.read_csv(Phenotypes2, compression='gzip', sep='\t', quotechar='"', skiprows=10)
    IT = IT.loc[IT['SMMTRLTP'].isin(['Whole Blood:Whole Blood'])] #
    IT['SAMPID'] = [x[:9] for x in list(IT['SAMPID'])]   #Reformat ID first 9 char

    code=['SAMPID','SMGEBTCH','SMTSISCH','SMTPAX','SMTSTPTREF','SMNABTCH','SMCENTER']
    Vars = IT.loc[:,code]
    Vars = Vars.loc[Vars['SAMPID'].isin(idx)]   #Select only samples of interest
    Vars = Vars.groupby('SAMPID').first()      #Take earliest occurence if Duplicates

    L=list(Vars.index)
    peerfactors = peerfactor.loc[peerfactor.index.isin(L)]

        #GET other phenotypes
    IT=pd.read_csv(Phenotypes2, compression='gzip', sep='\t', quotechar='"', skiprows=10)
    ind=['SUBJID','AGE','TRISCHD','RACE','GENDER', 'DTHHRDY', 'SMTSISCH']
    Ischtime = IT.loc[:,ind]
    Isctime=Ischtime.loc[Ischtime['SUBJID'].isin(L)]

    SMTSISCH = [ss.pearsonr(peerfactors[YY].values,Vars['SMTSISCH'].values)[0] for YY in Index]
    SMCENTER = [ss.spearmanr(peerfactors[YY].values,Vars['SMCENTER'].values).correlation for YY in Index]
    SMTSTPTREF =[ss.spearmanr(peerfactors[YY].values,Vars['SMTSTPTREF'].values).correlation for YY in Index]
    DTHHRDY=[ss.spearmanr(peerfactors[YY].values,Isctime['DTHHRDY'].values).correlation for YY in Index]
    TRISCHD=[ss.spearmanr(peerfactors[YY].values,Isctime['TRISCHD'].values)[0] for YY in Index]
    AGE = [ss.spearmanr(peerfactors[YY].values,Isctime['AGE'].values)[0] for YY in Index]

    df = pd.DataFrame( {'SMTSISCH': SMTSISCH,'SMCENTER':SMCENTER,'SMTSTPTREF':SMTSTPTREF,
         'TRISCHD': TRISCHD,'DTHHRDY':DTHHRDY, 'AGE':AGE }, index=Index)
    return df
    

In [3]:
for T in Tissues:
    T='WholeBlood'
    df = plotheatmap(T)
    df.drop('V1', inplace=True)
    print(T, df.values.min().min(), df.values.max().max())
    labels = ['AGE', 'Hardy scale', 'Collection center', 'Sample Ischemic time', 'Procurement reference point', 'Individual ischemic time']

    fig, ax = plt.subplots(figsize=(4, 4))
    sns.heatmap(df, cmap='coolwarm', vmin=df.values.min().min(), vmax=df.values.max().max(), square=False, ax=ax,
               xticklabels=labels, yticklabels=list(df.index));
    ax.invert_xaxis();
    plt.show()
    break

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  c /= stddev[:, None]
  c /= stddev[None, :]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [5]:
Adipose-Subcutaneous
Artery-Tibial
Esophagus-Mucosa
Cells-Transformedfibroblasts
Thyroid
WholeBlood


NameError: name 'Vars' is not defined

In [None]:
SMGEBTC   Expression batch ID                       SMCENTER   Collection center
DTHHRDY    Hardy scale                              SMTSISCH   Ischemic time for sample
TRISCHD    Ischemic time for individual             AGE        Age of individual
RACE       Self reported race                       SMTPAX      Time spent in fixative
SMTSTPTREF  Procurement reference point             SMNABTCH     Nucleic acid isolation batch
SMRIN        RNA quality score (RIN)                GENDER       Gender of individual