In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import util
import time
import os 
from statsmodels.stats.multitest import multipletests

### Define functions

In [2]:
#SEBASTIEN URCHS
def p_permut(empirical_value, permutation_values):
    n_permutation = len(permutation_values)
    if empirical_value >= 0:
        return (np.sum(permutation_values > empirical_value)+1) / (n_permutation + 1)
    return (np.sum(permutation_values < empirical_value)+1) / (n_permutation + 1)

### Load null models and beta maps
Beta maps generated w/ generate_beta_maps.ipynb, null models generated w/ generate_null_model.py
 - mc: mean corrected (use for variance)
 - nomc: not mean corrected (use for mean shift)

In [35]:
all_groups = ['DEL15q11_2','DUP15q11_2','DUP15q13_3_CHRNA7','DEL2q13','DUP2q13','DEL16p13_11','DUP16p13_11','DEL13q12_12','DUP13q12_12',
        'DEL17p12','DUP17p12','TAR_dup','NRXN1del','DEL1q21_1','DUP1q21_1','DEL22q11_2','DUP22q11_2','DEL16p11_2','DUP16p11_2',
      'SZ','BIP','ASD','ADHD']
prs = ['Stand_PRS_newCDG2_ukbb','Stand_PRS_ASD','Stand_PRS_SCZ','Stand_PRS_MDD','Stand_PRS_IQ', 'Stand_PRS_BMI','Stand_PRS_height',
       'Stand_PRS_T2D','Stand_PRS_LDL','Stand_PRS_CKD','Stand_PRS_SA','Stand_PRS_thickness']
cont = prs + ['CT','SA','Vol','fluid_intelligence_score_all','Gfactor','Neuroticism']

In [36]:
null_models_mc = []
beta_maps_mc = []

n_path_mc = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/null_models/{}_null_model_mc.npy'
b_path_mc = '/home/harveyaa/Documents/clara_paper/drop_maillard_15q11_2del/cc_{}_results_mc.csv'
for group in groups_mc:
    null_models_mc.append(np.load(n_path_mc.format(group)))
    beta_maps_mc.append(pd.read_csv(b_path_mc.format(group))['betas']) #UNSTANDARDIZED BETAS
    
n_path_mc_cont = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/null_models_continuous/{}_null_model_mc.npy'
b_path_mc_cont = '/home/harveyaa/Documents/clara_paper/drop_maillard_15q11_2del/cont_{}_results_mc.csv'
for group in cont:
    null_models_mc.append(np.load(n_path_mc_cont.format(group)))
    if (group in ['CT','SA','Vol','fluid_intelligence_score_all','Gfactor','Neuroticism']):
        beta_maps_mc.append(pd.read_csv(b_path_mc_cont.format(group+'_z'))['betas']) #UNSTANDARDIZED BETAS
    else:
        beta_maps_mc.append(pd.read_csv(b_path_mc_cont.format(group))['betas']) #UNSTANDARDIZED BETAS

In [37]:
null_models_nomc = []
beta_maps_nomc = []

n_path_nomc = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/null_models/{}_null_model_nomc.npy'
b_path_nomc = '/home/harveyaa/Documents/clara_paper/drop_maillard_15q11_2del/cc_{}_results_nomc.csv'
for group in groups_nomc:
    null_models_nomc.append(np.load(n_path_nomc.format(group)))
    beta_maps_nomc.append(pd.read_csv(b_path_nomc.format(group))['betas']) #UNSTANDARDIZED BETAS
    
n_path_nomc_cont = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/null_models_continuous/{}_null_model_nomc.npy'
b_path_nomc_cont = '/home/harveyaa/Documents/clara_paper/drop_maillard_15q11_2del/cont_{}_results_nomc.csv'
for group in cont:
    null_models_nomc.append(np.load(n_path_nomc_cont.format(group)))
    if (group in ['CT','SA','Vol','fluid_intelligence_score_all','Gfactor','Neuroticism']):
        beta_maps_nomc.append(pd.read_csv(b_path_nomc_cont.format(group+'_z'))['betas']) #UNSTANDARDIZED BETAS
    else:
        beta_maps_nomc.append(pd.read_csv(b_path_nomc_cont.format(group))['betas']) #UNSTANDARDIZED BETAS

# **Mean shift**
For each null model (no mean correction) find the mean of the beta values

In [39]:
mean_shift = np.zeros((len(null_models_nomc),len(null_models_nomc[0])))
for i in range(len(null_models_nomc)):
    mod = null_models_nomc[i]
    for j in range(len(null_models_nomc[0])):
        mean_shift[i,j] = np.mean(mod[j])
        
mean_shift = pd.DataFrame(np.transpose(mean_shift),columns=all_groups+cont)
mean_shift

Unnamed: 0,DEL15q11_2,DUP15q11_2,DUP15q13_3_CHRNA7,DEL2q13,DUP2q13,DEL16p13_11,DUP16p13_11,DEL13q12_12,DUP13q12_12,DEL17p12,...,Stand_PRS_LDL,Stand_PRS_CKD,Stand_PRS_SA,Stand_PRS_thickness,CT,SA,Vol,fluid_intelligence_score_all,Gfactor,Neuroticism
0,0.012398,0.009307,0.016247,0.002446,-0.007780,-0.081506,-0.010914,-0.041992,-0.070205,-0.016916,...,0.000354,-0.001739,0.000368,-0.000299,-0.000760,-0.000068,-0.000495,-0.000194,0.000904,-0.000183
1,0.001017,0.015417,0.002319,0.002745,0.015855,-0.041374,0.001355,-0.033952,-0.035984,-0.014895,...,-0.001337,-0.000325,-0.000742,0.001781,-0.002219,0.000742,0.000837,-0.001131,0.001161,0.000529
2,-0.002694,0.000968,-0.002220,-0.008783,-0.000491,-0.032311,-0.014509,0.051575,0.027182,0.028840,...,-0.000962,0.001655,0.000678,0.001202,-0.000981,0.000099,0.000462,0.000457,-0.001383,-0.000667
3,0.003869,-0.010929,-0.012544,0.012231,-0.011627,0.041271,-0.008474,-0.025405,-0.052609,0.029248,...,-0.001010,-0.000189,-0.001024,0.000116,0.001668,-0.000714,-0.000421,0.000521,-0.001137,-0.000356
4,-0.009386,0.012052,0.003768,0.009418,0.001907,0.075680,-0.014234,0.029466,0.004519,-0.010515,...,-0.000435,0.001033,0.000435,-0.001188,0.000166,0.000345,0.000417,-0.000751,-0.000331,-0.000395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.024404,-0.001267,0.009696,0.023839,0.000116,-0.010918,0.009040,-0.026779,0.003139,0.060298,...,0.000247,-0.000343,0.000083,-0.000586,0.000820,-0.001187,-0.000764,-0.000376,-0.000311,-0.000624
4996,0.002167,0.001602,0.003986,0.010347,-0.000675,0.022352,-0.044270,-0.008025,-0.035164,0.020984,...,0.000390,0.001071,-0.001614,-0.000329,0.000626,-0.000377,-0.000591,-0.000312,-0.000112,-0.001780
4997,0.031948,-0.008395,-0.010355,0.014356,-0.008409,-0.010619,0.000073,-0.015428,-0.019078,0.040738,...,0.000594,0.001361,0.001766,0.000569,0.000900,-0.001561,-0.001419,-0.000733,-0.000070,-0.000524
4998,0.002727,-0.003083,0.016357,0.001644,0.018835,0.002987,-0.007445,0.042443,-0.000400,0.008560,...,-0.000530,0.000137,0.000374,0.000525,-0.000380,-0.000673,-0.000906,0.000478,-0.001634,-0.000651


In [41]:
p_val = []
for i in range(len(all_groups+cont)):
    p = p_permut(np.mean(beta_maps_nomc[i]),np.mean(null_models_nomc[i],1))
    p_val.append(p)

mean_shift_maps = []
for b_map in beta_maps_nomc:
    mean_shift_maps.append(np.mean(b_map))

mean_shift_pval = pd.DataFrame(np.array([mean_shift_maps,p_val]).transpose(),index=all_groups+cont,columns=['beta_map_mean','p_permut'])
mean_shift_pval.to_csv('/home/harveyaa/Documents/fMRI/cnv_fmri/results/ukbb_9cohorts/permutations/mean_shift_pval_unstandardized.csv')

In [42]:
mean_shift_pval

Unnamed: 0,beta_map_mean,p_permut
DEL15q11_2,0.00844,0.246151
DUP15q11_2,-0.012984,0.119576
DUP15q13_3_CHRNA7,0.001637,0.434313
DEL2q13,0.008729,0.179764
DUP2q13,-0.006226,0.322735
DEL16p13_11,-0.02952,0.241352
DUP16p13_11,-0.036267,0.033993
DEL13q12_12,0.00726,0.369726
DUP13q12_12,0.069424,0.009598
DEL17p12,-0.025447,0.229554


In [47]:
mean_shift_pval[mean_shift_pval['p_permut']<0.05]

Unnamed: 0,beta_map_mean,p_permut
DUP16p13_11,-0.036267,0.033993
DUP13q12_12,0.069424,0.009598
DUP1q21_1,0.067808,0.012597
DEL22q11_2,-0.04922,0.038192
DUP22q11_2,0.055253,0.024795
DEL16p11_2,0.039516,0.047191
SZ,-0.045715,0.0002
ASD,-0.020349,0.010798
Stand_PRS_height,-0.002308,0.0018
CT,0.00229,0.0016


In [51]:
mt = multipletests(mean_shift_pval['p_permut'],method='fdr_bh')
reject = mt[0]
qvalues = mt[1]

mean_shift_pval['reject_fdr'] = reject
mean_shift_pval['qvalues_fdr'] = qvalues

In [59]:
mean_shift_pval[mean_shift_pval['reject_fdr']]

Unnamed: 0,beta_map_mean,p_permut,reject_fdr,qvalues_fdr
DUP13q12_12,0.069424,0.009598,True,0.043725
DUP1q21_1,0.067808,0.012597,True,0.046954
SZ,-0.045715,0.0002,True,0.00205
ASD,-0.020349,0.010798,True,0.044271
Stand_PRS_height,-0.002308,0.0018,True,0.009223
CT,0.00229,0.0016,True,0.009223
SA,0.007049,0.0002,True,0.00205
Vol,0.006722,0.0002,True,0.00205
fluid_intelligence_score_all,0.002679,0.0004,True,0.003279
Gfactor,0.003521,0.0002,True,0.00205


# **Variance**
For each null model (with mean correction) find the var of the beta values

In [43]:
var = np.zeros((len(null_models_mc),len(null_models_mc[0])))
for i in range(len(null_models_mc)):
    mod = null_models_mc[i]
    for j in range(len(null_models_mc[0])):
        var[i,j] = np.var(mod[j])
        
var = pd.DataFrame(np.transpose(var),columns=all_groups+cont)
var

Unnamed: 0,DEL15q11_2,DUP15q11_2,DUP15q13_3_CHRNA7,DEL2q13,DUP2q13,DEL16p13_11,DUP16p13_11,DEL13q12_12,DUP13q12_12,DEL17p12,...,Stand_PRS_LDL,Stand_PRS_CKD,Stand_PRS_SA,Stand_PRS_thickness,CT,SA,Vol,fluid_intelligence_score_all,Gfactor,Neuroticism
0,0.000347,0.000256,0.000128,0.000194,0.000291,0.003514,0.000689,0.001082,0.001244,0.003143,...,1.101789e-06,1.059681e-06,0.000002,0.000002,8.397529e-07,1.203807e-06,1.203122e-06,9.138960e-07,0.000001,0.000001
1,0.000313,0.000229,0.000213,0.000176,0.000391,0.003495,0.001026,0.001621,0.001586,0.002297,...,1.464735e-06,9.513716e-07,0.000002,0.000002,1.297209e-06,1.020496e-06,1.006250e-06,1.009827e-06,0.000002,0.000001
2,0.000276,0.000273,0.000183,0.000155,0.000337,0.002996,0.000833,0.001680,0.001536,0.001711,...,1.171377e-06,9.771435e-07,0.000002,0.000001,1.220339e-06,9.945602e-07,1.366119e-06,1.293049e-06,0.000002,0.000001
3,0.000299,0.000187,0.000173,0.000170,0.000370,0.002494,0.001054,0.001814,0.001666,0.002528,...,1.012450e-06,1.135673e-06,0.000002,0.000002,1.350892e-06,1.312082e-06,1.412878e-06,1.086451e-06,0.000001,0.000001
4,0.000370,0.000279,0.000143,0.000141,0.000386,0.007206,0.000856,0.001296,0.001488,0.002859,...,9.514255e-07,1.016019e-06,0.000002,0.000002,9.258747e-07,1.180609e-06,1.083057e-06,9.415885e-07,0.000002,0.000002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.000325,0.000244,0.000153,0.000148,0.000455,0.003762,0.000856,0.001809,0.001249,0.001664,...,7.951882e-07,1.095077e-06,0.000001,0.000002,9.704819e-07,1.182781e-06,1.371032e-06,9.079051e-07,0.000002,0.000001
4996,0.000312,0.000205,0.000256,0.000168,0.000330,0.003300,0.000858,0.001212,0.001481,0.002134,...,8.428804e-07,1.038967e-06,0.000002,0.000001,1.368902e-06,9.355001e-07,8.470609e-07,1.037427e-06,0.000001,0.000002
4997,0.000267,0.000193,0.000191,0.000173,0.000386,0.002647,0.000709,0.001303,0.001273,0.002147,...,1.285626e-06,9.987411e-07,0.000002,0.000002,9.907788e-07,9.047491e-07,9.031647e-07,1.155288e-06,0.000002,0.000001
4998,0.000273,0.000235,0.000163,0.000261,0.000265,0.002312,0.000751,0.001863,0.001389,0.001399,...,1.047988e-06,1.114948e-06,0.000002,0.000002,1.016137e-06,1.415562e-06,1.468861e-06,1.017306e-06,0.000001,0.000002


In [44]:
p_val_var = []
for i in range(len(all_groups+cont)):
    p = p_permut(np.var(beta_maps_mc[i]),np.var(null_models_mc[i],1))
    p_val_var.append(p)
    
var_maps = []
for b_map in beta_maps_mc:
    var_maps.append(np.var(b_map))

var_pval = pd.DataFrame(np.array([var_maps,p_val_var]).transpose(),index=all_groups+cont,columns=['beta_map_var','p_permut'])

In [48]:
var_pval[var_pval['p_permut'] < 0.05]

Unnamed: 0,beta_map_var,p_permut
DEL15q11_2,0.000486,0.014797
DUP15q11_2,0.000335,0.035593
DEL1q21_1,0.00231,0.004399
DUP1q21_1,0.002878,0.006999
DEL22q11_2,0.00413,0.0002
DUP22q11_2,0.002116,0.032394
DEL16p11_2,0.003897,0.0002
DUP16p11_2,0.001745,0.002599
SZ,0.001053,0.0002
BIP,0.00204,0.0002


In [46]:
var_pval.to_csv('/home/harveyaa/Documents/fMRI/cnv_fmri/results/ukbb_9cohorts/permutations/var_pval_unstandardized.csv')

In [50]:
var_pval

Unnamed: 0,beta_map_var,p_permut
DEL15q11_2,0.000486,0.014797
DUP15q11_2,0.000335,0.035593
DUP15q13_3_CHRNA7,0.000136,0.886223
DEL2q13,0.000141,0.885223
DUP2q13,0.000373,0.418516
DEL16p13_11,0.002626,0.811838
DUP16p13_11,0.000828,0.35053
DEL13q12_12,0.001459,0.466107
DUP13q12_12,0.00113,0.971006
DEL17p12,0.001977,0.493901


In [61]:
mt = multipletests(var_pval['p_permut'],method='fdr_bh')
reject = mt[0]
qvalues = mt[1]

var_pval['reject_fdr'] = reject
var_pval['qvalues_fdr'] = qvalues

In [62]:
var_pval[var_pval['reject_fdr']]

Unnamed: 0,beta_map_var,p_permut,reject_fdr,qvalues_fdr
DEL15q11_2,0.000486,0.014797,True,0.027576
DEL1q21_1,0.00231,0.004399,True,0.009018
DUP1q21_1,0.002878,0.006999,True,0.013664
DEL22q11_2,0.00413,0.0002,True,0.000547
DEL16p11_2,0.003897,0.0002,True,0.000547
DUP16p11_2,0.001745,0.002599,True,0.005921
SZ,0.001053,0.0002,True,0.000547
BIP,0.00204,0.0002,True,0.000547
ASD,0.000483,0.0002,True,0.000547
ADHD,0.00015,0.027594,True,0.047141
