# Correlation between IFN response and PRS phenotype in bootstrapped cohorts

In [1]:
import pandas as pd
import numpy as np
import cna, os, pickle
import scipy.stats as st
import statsmodels.api as sm
src_folder = "/data/srlab/lrumker/datasets/onek1k/pheno/"
res_folder = "/data/srlab/lrumker/MCSC_Project/cna-prs/results/"
np.random.seed(0)

In [2]:
def bootstrapped_p_ifn_cor(celltype, nreps = 1000):

    res = pickle.load(open(res_folder+"PRS/SLE_Myeloid.p", 'rb'))
    d = cna.read("/data/srlab/lrumker/datasets/onek1k/pheno/"+celltype+".h5ad")
    
    # Compute mean expr per sample of IFN-alpha response gene set
    ifn_genes = pd.read_csv("/data/srlab/lrumker/MCSC_Project/cna-prs/results/PRS/ifna_geneset.csv", 
                            index_col = 0).iloc[:,0].values
    i_ifn_genes = [i for i in np.arange(d.var.shape[0]) if d.var.index[i] in ifn_genes]
    ifn_percell = d.X[:,i_ifn_genes].sum(axis=1)
    d.obs['IFN'] = ifn_percell
    d.obs_to_sample(['IFN'], aggregate = np.mean)
    
    
    IFN = d.obs.loc[res.kept, ['id', 'IFN']]
    ncorrs_df = d.obs.loc[res.kept, ['id']]
    ncorrs_df['ncorrs'] = res.ncorrs
    
    ncorrs_df['cellid'] = np.repeat('', ncorrs_df.shape[0])
    for donor in ncorrs_df.id.value_counts().index:
        ncorrs_df.loc[ncorrs_df.id==donor,'cellid'] = [donor+"_"+str(i) for i in np.arange(np.sum(ncorrs_df.id==donor))]
    for donor in IFN.id.value_counts().index:
        IFN.loc[IFN.id==donor,'cellid'] = [donor+"_"+str(i) for i in np.arange(np.sum(IFN.id==donor))]
    ncorrs_df.set_index('cellid', inplace = True, drop = True)
    IFN.set_index('cellid', inplace = True, drop = True)
    
    np.random.seed(0)
    corr_magnitude = []
    for i in np.arange(nreps):
        donors = np.random.choice(d.samplem.index, d.N)
        bootstrap_ncorrs = np.concatenate([ncorrs_df.loc[[donor+"_"+str(i) for i in np.arange(np.sum(ncorrs_df.id==donor))],'ncorrs'].values \
                    for donor in donors])
        bootstrap_ifn = np.concatenate([IFN.loc[[donor+"_"+str(i) for i in np.arange(np.sum(IFN.id==donor))],'IFN'].values \
                    for donor in donors])
        corr_magnitude.extend([np.abs(np.corrcoef(bootstrap_ifn,bootstrap_ncorrs)[0,1])])

    # P-value
    return(np.sum(np.array(corr_magnitude)<=0)/len(corr_magnitude))

In [3]:
bootstrapped_p_ifn_cor("Myeloid", nreps = 1000)

0.0