In [31]:
%matplotlib inline
import os
import math 
import numpy as np
import pandas as pd 
import seaborn as sns
import random
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from scipy.stats import pearsonr, spearmanr, gaussian_kde
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.preprocessing import scale
from statsmodels.sandbox.stats.multicomp import fdrcorrection0
from matplotlib.colors import rgb2hex
import warnings
sns.set_style('white')
pd.options.display.max_rows = 2000
pd.options.display.max_columns = 999
warnings.filterwarnings('ignore')

ahba_dir     = '/Users/kanaaax/Google Drive/TS-EUROTRAIN/RESULTS_QSMv3/dataframes/AHBA/'
gsea_dir     = '/Users/kanaaax/Desktop/GSEA'
permute_dir  = '/Users/kanaaax/Google Drive/TS-EUROTRAIN/RESULTS_QSMv3/dataframes/permutations'
save_fig_dir = '/Users/kanaaax/Google Drive/TS-EUROTRAIN/Papers/2016_QSM_paper/Figures_python_v4'

# wells outside the striatal masks 
wells = [2371,       # AHBA claustrum, right
         2379,       # AHBA claustrum, left
         159226045,  # AHBA striatum -- out of mask
         160091500   # AHBA septal nuclei, left
         ] 
#housekeeping
drop_strings = ['coords_native', 'donor_names', 'struct_id', 'struct_name', 'top_struct', 'Mean', 'Median', 'PC1', 'PC2','PC3', ]


In [25]:
###########################################
# Read QSM stat maps#
##########################################

df_MNI = pd.read_csv(os.path.join(ahba_dir,'QSM_TSTATS/MNI_NIFTI_VALUES_permute_10K_OCT2.csv'), index_col = 0 )

In [26]:
###########################################
# Read expression values of AHBA database  
###########################################

AHBA = pd.read_csv(os.path.join(ahba_dir, 'ahba_data', 'AHBA_20737.csv'), index_col = 0)

In [27]:
############################################
# Concatenate geneset expression dataframes 
############################################

def return_expression_df(geneset):
    df = pd.read_csv(os.path.join(ahba_dir, 'AHBA_%s.csv'%geneset),index_col=0)
    gs = [i for i in df.columns if i not in drop_strings]
    return df, gs 

IRON_H,  GS_IRON_H   = return_expression_df('IRON_HOMEOSTASIS_PCA')
IRON_D,  GS_IRON_D   = return_expression_df('IRON_D_PCA')
IRON_T2, GS_IRON_T2  = return_expression_df('IRON_TRANSPORT2_PCA')
FERRITIN,GS_FERRITIN = return_expression_df('FERRITIN_PCA')

genesets = {'IRON_H'  : GS_IRON_H,'IRON_D'  : GS_IRON_D,'IRON_T2' : GS_IRON_T2,'FERRITIN': GS_FERRITIN}

def concat_dfs(measure):
    df  = pd.DataFrame(index = IRON_H.index, columns = ['IRON', 'IRON_D', 'IRON_T2', 'FERRITIN', 'top_struct', 'struct'])
    xval = 1
    df['top_struct']        = IRON_H.top_struct
    df['struct']            = IRON_H.struct_name
    df['IRON_H']            = IRON_H[measure] * xval
    df['IRON_D']            = IRON_D[measure]
    df['IRON_T2']           = IRON_T2[measure] * xval
    df['FERRITIN']          = FERRITIN[measure] * xval    
    df['FERRITIN']          = FERRITIN[measure] * xval    
    dfc = pd.concat([df_MNI, df], axis = 1)
    return dfc

df_PC1   = concat_dfs('RC1')
df_MU    = concat_dfs('Mean')
df_MD    = concat_dfs('Median')

dfs = {'df_PC1': df_PC1, 'df_MU':df_MU, 'df_MD':df_MD}

In [40]:
nucleus = ['STR3_MOTOR_tstat_CP_1mm']

df_chi  = df_MNI.drop([i for i in df_MNI.columns if i not in ['STR_tstat_CP_1mm']],axis=1)
df_chi  = pd.DataFrame(df_MNI[nucleus].drop(wells,axis=0).dropna())
len(df_chi)

58

In [35]:
df_MNI.head()

Unnamed: 0,corrected_mni_x,corrected_mni_y,corrected_mni_z,mni_coords,Caud_tstat_CP_1mm,Caud_tfce_corrp_tstat_CP_1mm,Caud_vox_p_tstat_CP_1mm,Caud_vox_corrp_tstat_CP_1mm,Caud_tfce_tstat_CP_1mm,Caud_tfce_p_tstat_CP_1mm,Pall_tstat_CP_1mm,Pall_tfce_corrp_tstat_CP_1mm,Pall_vox_p_tstat_CP_1mm,Pall_vox_corrp_tstat_CP_1mm,Pall_tfce_tstat_CP_1mm,Pall_tfce_p_tstat_CP_1mm,Puta_tstat_CP_1mm,Puta_tfce_corrp_tstat_CP_1mm,Puta_vox_p_tstat_CP_1mm,Puta_vox_corrp_tstat_CP_1mm,Puta_tfce_tstat_CP_1mm,Puta_tfce_p_tstat_CP_1mm,STR_tstat_CP_1mm,STR_tfce_corrp_tstat_CP_1mm,STR_vox_p_tstat_CP_1mm,STR_vox_corrp_tstat_CP_1mm,STR_tfce_tstat_CP_1mm,STR_tfce_p_tstat_CP_1mm,STR3_MOTOR_tstat_CP_1mm,STR3_MOTOR_tfce_corrp_tstat_CP_1mm,STR3_MOTOR_vox_p_tstat_CP_1mm,STR3_MOTOR_vox_corrp_tstat_CP_1mm,STR3_MOTOR_tfce_tstat_CP_1mm,STR3_MOTOR_tfce_p_tstat_CP_1mm,STR3_MOTOR_tstat_LL_1mm,STR3_MOTOR_tfce_corrp_tstat_LL_1mm,STR3_MOTOR_vox_p_tstat_LL_1mm,STR3_MOTOR_vox_corrp_tstat_LL_1mm,STR3_MOTOR_tfce_tstat_LL_1mm,STR3_MOTOR_tfce_p_tstat_LL_1mm,STR3_MOTOR_Pall_tstat_CP_1mm,STR3_MOTOR_Pall_tfce_corrp_tstat_CP_1mm,STR3_MOTOR_Pall_vox_p_tstat_CP_1mm,STR3_MOTOR_Pall_vox_corrp_tstat_CP_1mm,STR3_MOTOR_Pall_tfce_tstat_CP_1mm,STR3_MOTOR_Pall_tfce_p_tstat_CP_1mm,STR3_MOTOR_Pall_tstat_LL_1mm,STR3_MOTOR_Pall_tfce_corrp_tstat_LL_1mm,STR3_MOTOR_Pall_vox_p_tstat_LL_1mm,STR3_MOTOR_Pall_vox_corrp_tstat_LL_1mm,STR3_MOTOR_Pall_tfce_tstat_LL_1mm,STR3_MOTOR_Pall_tfce_p_tstat_LL_1mm,STR3_LIMBIC_tstat_CP_1mm,STR3_LIMBIC_tfce_corrp_tstat_CP_1mm,STR3_LIMBIC_vox_p_tstat_CP_1mm,STR3_LIMBIC_vox_corrp_tstat_CP_1mm,STR3_LIMBIC_tfce_tstat_CP_1mm,STR3_LIMBIC_tfce_p_tstat_CP_1mm,STR3_EXEC_tstat_CP_1mm,STR3_EXEC_tfce_corrp_tstat_CP_1mm,STR3_EXEC_vox_p_tstat_CP_1mm,STR3_EXEC_vox_corrp_tstat_CP_1mm,STR3_EXEC_tfce_tstat_CP_1mm,STR3_EXEC_tfce_p_tstat_CP_1mm,GM_0.0_tstat_CP_1mm,GM_0.0_tfce_corrp_tstat_CP_1mm,GM_0.0_vox_p_tstat_CP_1mm,GM_0.0_vox_corrp_tstat_CP_1mm,GM_0.0_tfce_tstat_CP_1mm,GM_0.0_tfce_p_tstat_CP_1mm,Caud_tstat_CP_2mm,Caud_tfce_corrp_tstat_CP_2mm,Caud_vox_p_tstat_CP_2mm,Caud_vox_corrp_tstat_CP_2mm,Caud_tfce_tstat_CP_2mm,Caud_tfce_p_tstat_CP_2mm,Pall_tstat_CP_2mm,Pall_tfce_corrp_tstat_CP_2mm,Pall_vox_p_tstat_CP_2mm,Pall_vox_corrp_tstat_CP_2mm,Pall_tfce_tstat_CP_2mm,Pall_tfce_p_tstat_CP_2mm,Puta_tstat_CP_2mm,Puta_tfce_corrp_tstat_CP_2mm,Puta_vox_p_tstat_CP_2mm,Puta_vox_corrp_tstat_CP_2mm,Puta_tfce_tstat_CP_2mm,Puta_tfce_p_tstat_CP_2mm,STR_tstat_CP_2mm,STR_tfce_corrp_tstat_CP_2mm,STR_vox_p_tstat_CP_2mm,STR_vox_corrp_tstat_CP_2mm,STR_tfce_tstat_CP_2mm,STR_tfce_p_tstat_CP_2mm,STR3_MOTOR_tstat_CP_2mm,STR3_MOTOR_tfce_corrp_tstat_CP_2mm,STR3_MOTOR_vox_p_tstat_CP_2mm,STR3_MOTOR_vox_corrp_tstat_CP_2mm,STR3_MOTOR_tfce_tstat_CP_2mm,STR3_MOTOR_tfce_p_tstat_CP_2mm,STR3_MOTOR_tstat_LL_2mm,STR3_MOTOR_tfce_corrp_tstat_LL_2mm,STR3_MOTOR_vox_p_tstat_LL_2mm,STR3_MOTOR_vox_corrp_tstat_LL_2mm,STR3_MOTOR_tfce_tstat_LL_2mm,STR3_MOTOR_tfce_p_tstat_LL_2mm,STR3_MOTOR_Pall_tstat_CP_2mm,STR3_MOTOR_Pall_tfce_corrp_tstat_CP_2mm,STR3_MOTOR_Pall_vox_p_tstat_CP_2mm,STR3_MOTOR_Pall_vox_corrp_tstat_CP_2mm,STR3_MOTOR_Pall_tfce_tstat_CP_2mm,STR3_MOTOR_Pall_tfce_p_tstat_CP_2mm,STR3_MOTOR_Pall_tstat_LL_2mm,STR3_MOTOR_Pall_tfce_corrp_tstat_LL_2mm,STR3_MOTOR_Pall_vox_p_tstat_LL_2mm,STR3_MOTOR_Pall_vox_corrp_tstat_LL_2mm,STR3_MOTOR_Pall_tfce_tstat_LL_2mm,STR3_MOTOR_Pall_tfce_p_tstat_LL_2mm,STR3_LIMBIC_tstat_CP_2mm,STR3_LIMBIC_tfce_corrp_tstat_CP_2mm,STR3_LIMBIC_vox_p_tstat_CP_2mm,STR3_LIMBIC_vox_corrp_tstat_CP_2mm,STR3_LIMBIC_tfce_tstat_CP_2mm,STR3_LIMBIC_tfce_p_tstat_CP_2mm,STR3_EXEC_tstat_CP_2mm,STR3_EXEC_tfce_corrp_tstat_CP_2mm,STR3_EXEC_vox_p_tstat_CP_2mm,STR3_EXEC_vox_corrp_tstat_CP_2mm,STR3_EXEC_tfce_tstat_CP_2mm,STR3_EXEC_tfce_p_tstat_CP_2mm,GM_0.0_tstat_CP_2mm,GM_0.0_tfce_corrp_tstat_CP_2mm,GM_0.0_vox_p_tstat_CP_2mm,GM_0.0_vox_corrp_tstat_CP_2mm,GM_0.0_tfce_tstat_CP_2mm,GM_0.0_tfce_p_tstat_CP_2mm,Caud_tstat_CP_3mm,Caud_tfce_corrp_tstat_CP_3mm,Caud_vox_p_tstat_CP_3mm,Caud_vox_corrp_tstat_CP_3mm,Caud_tfce_tstat_CP_3mm,Caud_tfce_p_tstat_CP_3mm,Pall_tstat_CP_3mm,Pall_tfce_corrp_tstat_CP_3mm,Pall_vox_p_tstat_CP_3mm,Pall_vox_corrp_tstat_CP_3mm,Pall_tfce_tstat_CP_3mm,Pall_tfce_p_tstat_CP_3mm,Puta_tstat_CP_3mm,Puta_tfce_corrp_tstat_CP_3mm,Puta_vox_p_tstat_CP_3mm,Puta_vox_corrp_tstat_CP_3mm,Puta_tfce_tstat_CP_3mm,Puta_tfce_p_tstat_CP_3mm,STR_tstat_CP_3mm,STR_tfce_corrp_tstat_CP_3mm,STR_vox_p_tstat_CP_3mm,STR_vox_corrp_tstat_CP_3mm,STR_tfce_tstat_CP_3mm,STR_tfce_p_tstat_CP_3mm,STR3_MOTOR_tstat_CP_3mm,STR3_MOTOR_tfce_corrp_tstat_CP_3mm,STR3_MOTOR_vox_p_tstat_CP_3mm,STR3_MOTOR_vox_corrp_tstat_CP_3mm,STR3_MOTOR_tfce_tstat_CP_3mm,STR3_MOTOR_tfce_p_tstat_CP_3mm,STR3_MOTOR_tstat_LL_3mm,STR3_MOTOR_tfce_corrp_tstat_LL_3mm,STR3_MOTOR_vox_p_tstat_LL_3mm,STR3_MOTOR_vox_corrp_tstat_LL_3mm,STR3_MOTOR_tfce_tstat_LL_3mm,STR3_MOTOR_tfce_p_tstat_LL_3mm,STR3_MOTOR_Pall_tstat_CP_3mm,STR3_MOTOR_Pall_tfce_corrp_tstat_CP_3mm,STR3_MOTOR_Pall_vox_p_tstat_CP_3mm,STR3_MOTOR_Pall_vox_corrp_tstat_CP_3mm,STR3_MOTOR_Pall_tfce_tstat_CP_3mm,STR3_MOTOR_Pall_tfce_p_tstat_CP_3mm,STR3_MOTOR_Pall_tstat_LL_3mm,STR3_MOTOR_Pall_tfce_corrp_tstat_LL_3mm,STR3_MOTOR_Pall_vox_p_tstat_LL_3mm,STR3_MOTOR_Pall_vox_corrp_tstat_LL_3mm,STR3_MOTOR_Pall_tfce_tstat_LL_3mm,STR3_MOTOR_Pall_tfce_p_tstat_LL_3mm,STR3_LIMBIC_tstat_CP_3mm,STR3_LIMBIC_tfce_corrp_tstat_CP_3mm,STR3_LIMBIC_vox_p_tstat_CP_3mm,STR3_LIMBIC_vox_corrp_tstat_CP_3mm,STR3_LIMBIC_tfce_tstat_CP_3mm,STR3_LIMBIC_tfce_p_tstat_CP_3mm,STR3_EXEC_tstat_CP_3mm,STR3_EXEC_tfce_corrp_tstat_CP_3mm,STR3_EXEC_vox_p_tstat_CP_3mm,STR3_EXEC_vox_corrp_tstat_CP_3mm,STR3_EXEC_tfce_tstat_CP_3mm,STR3_EXEC_tfce_p_tstat_CP_3mm,GM_0.0_tstat_CP_3mm,GM_0.0_tfce_corrp_tstat_CP_3mm,GM_0.0_vox_p_tstat_CP_3mm,GM_0.0_vox_corrp_tstat_CP_3mm,GM_0.0_tfce_tstat_CP_3mm,GM_0.0_tfce_p_tstat_CP_3mm,Caud_tstat_CP_4mm,Caud_tfce_corrp_tstat_CP_4mm,Caud_vox_p_tstat_CP_4mm,Caud_vox_corrp_tstat_CP_4mm,Caud_tfce_tstat_CP_4mm,Caud_tfce_p_tstat_CP_4mm,Pall_tstat_CP_4mm,Pall_tfce_corrp_tstat_CP_4mm,Pall_vox_p_tstat_CP_4mm,Pall_vox_corrp_tstat_CP_4mm,Pall_tfce_tstat_CP_4mm,Pall_tfce_p_tstat_CP_4mm,Puta_tstat_CP_4mm,Puta_tfce_corrp_tstat_CP_4mm,Puta_vox_p_tstat_CP_4mm,Puta_vox_corrp_tstat_CP_4mm,Puta_tfce_tstat_CP_4mm,Puta_tfce_p_tstat_CP_4mm,STR_tstat_CP_4mm,STR_tfce_corrp_tstat_CP_4mm,STR_vox_p_tstat_CP_4mm,STR_vox_corrp_tstat_CP_4mm,STR_tfce_tstat_CP_4mm,STR_tfce_p_tstat_CP_4mm,STR3_MOTOR_tstat_CP_4mm,STR3_MOTOR_tfce_corrp_tstat_CP_4mm,STR3_MOTOR_vox_p_tstat_CP_4mm,STR3_MOTOR_vox_corrp_tstat_CP_4mm,STR3_MOTOR_tfce_tstat_CP_4mm,STR3_MOTOR_tfce_p_tstat_CP_4mm,STR3_MOTOR_tstat_LL_4mm,STR3_MOTOR_tfce_corrp_tstat_LL_4mm,STR3_MOTOR_vox_p_tstat_LL_4mm,STR3_MOTOR_vox_corrp_tstat_LL_4mm,STR3_MOTOR_tfce_tstat_LL_4mm,STR3_MOTOR_tfce_p_tstat_LL_4mm,STR3_MOTOR_Pall_tstat_CP_4mm,STR3_MOTOR_Pall_tfce_corrp_tstat_CP_4mm,STR3_MOTOR_Pall_vox_p_tstat_CP_4mm,STR3_MOTOR_Pall_vox_corrp_tstat_CP_4mm,STR3_MOTOR_Pall_tfce_tstat_CP_4mm,STR3_MOTOR_Pall_tfce_p_tstat_CP_4mm,STR3_MOTOR_Pall_tstat_LL_4mm,STR3_MOTOR_Pall_tfce_corrp_tstat_LL_4mm,STR3_MOTOR_Pall_vox_p_tstat_LL_4mm,STR3_MOTOR_Pall_vox_corrp_tstat_LL_4mm,STR3_MOTOR_Pall_tfce_tstat_LL_4mm,STR3_MOTOR_Pall_tfce_p_tstat_LL_4mm,STR3_LIMBIC_tstat_CP_4mm,STR3_LIMBIC_tfce_corrp_tstat_CP_4mm,STR3_LIMBIC_vox_p_tstat_CP_4mm,STR3_LIMBIC_vox_corrp_tstat_CP_4mm,STR3_LIMBIC_tfce_tstat_CP_4mm,STR3_LIMBIC_tfce_p_tstat_CP_4mm,STR3_EXEC_tstat_CP_4mm,STR3_EXEC_tfce_corrp_tstat_CP_4mm,STR3_EXEC_vox_p_tstat_CP_4mm,STR3_EXEC_vox_corrp_tstat_CP_4mm,STR3_EXEC_tfce_tstat_CP_4mm,STR3_EXEC_tfce_p_tstat_CP_4mm,GM_0.0_tstat_CP_4mm,GM_0.0_tfce_corrp_tstat_CP_4mm,GM_0.0_vox_p_tstat_CP_4mm,GM_0.0_vox_corrp_tstat_CP_4mm,GM_0.0_tfce_tstat_CP_4mm,GM_0.0_tfce_p_tstat_CP_4mm,MEAN_STR_CONTROLS_1mm,MEAN_STR_CONTROLS_2mm,MEAN_STR_CONTROLS_3mm,MEAN_STR_CONTROLS_4mm,MEAN_CAUD_CONTROLS_1mm,MEAN_CAUD_CONTROLS_2mm,MEAN_CAUD_CONTROLS_3mm,MEAN_CAUD_CONTROLS_4mm,MEAN_PUTA_CONTROLS_1mm,MEAN_PUTA_CONTROLS_2mm,MEAN_PUTA_CONTROLS_3mm,MEAN_PUTA_CONTROLS_4mm,MEAN_PALL_CONTROLS_1mm,MEAN_PALL_CONTROLS_2mm,MEAN_PALL_CONTROLS_3mm,MEAN_PALL_CONTROLS_4mm,MEAN_STR3_MOTOR_CONTROLS_1mm,MEAN_STR3_MOTOR_CONTROLS_2mm,MEAN_STR3_MOTOR_CONTROLS_3mm,MEAN_STR3_MOTOR_CONTROLS_4mm,MEAN_STR3_LIMBIC_CONTROLS_1mm,MEAN_STR3_LIMBIC_CONTROLS_2mm,MEAN_STR3_LIMBIC_CONTROLS_3mm,MEAN_STR3_LIMBIC_CONTROLS_4mm,MEAN_STR3_EXEC_CONTROLS_1mm,MEAN_STR3_EXEC_CONTROLS_2mm,MEAN_STR3_EXEC_CONTROLS_3mm,MEAN_STR3_EXEC_CONTROLS_4mm,MEAN_STR3_MOTOR_Pall_CONTROLS_1mm,MEAN_STR3_MOTOR_Pall_CONTROLS_2mm,MEAN_STR3_MOTOR_Pall_CONTROLS_3mm,MEAN_STR3_MOTOR_Pall_CONTROLS_4mm,MEAN_CONTROLS_GM_thr0.00_1,MEAN_CONTROLS_GM_thr0.00_2,MEAN_CONTROLS_GM_thr0.00_4,MEAN_CONTROLS_GM_thr0.00_6,MEAN_CONTROLS_GM_thr0.00_8,MEAN_CONTROLS_GM_thr0.00_10,MEAN_CONTROLS_GM_1,MEAN_CONTROLS_GM_2,MEAN_CONTROLS_GM_4,MEAN_CONTROLS_GM_6,MEAN_CONTROLS_GM_8,MEAN_CONTROLS_GM_10,MEAN_STR_PATIENTS_1mm,MEAN_STR_PATIENTS_2mm,MEAN_STR_PATIENTS_3mm,MEAN_STR_PATIENTS_4mm,MEAN_CAUD_PATIENTS_1mm,MEAN_CAUD_PATIENTS_2mm,MEAN_CAUD_PATIENTS_3mm,MEAN_CAUD_PATIENTS_4mm,MEAN_PUTA_PATIENTS_1mm,MEAN_PUTA_PATIENTS_2mm,MEAN_PUTA_PATIENTS_3mm,MEAN_PUTA_PATIENTS_4mm,MEAN_PALL_PATIENTS_1mm,MEAN_PALL_PATIENTS_2mm,MEAN_PALL_PATIENTS_3mm,MEAN_PALL_PATIENTS_4mm,MEAN_STR3_MOTOR_PATIENTS_1mm,MEAN_STR3_MOTOR_PATIENTS_2mm,MEAN_STR3_MOTOR_PATIENTS_3mm,MEAN_STR3_MOTOR_PATIENTS_4mm,MEAN_STR3_LIMBIC_PATIENTS_1mm,MEAN_STR3_LIMBIC_PATIENTS_2mm,MEAN_STR3_LIMBIC_PATIENTS_3mm,MEAN_STR3_LIMBIC_PATIENTS_4mm,MEAN_STR3_EXEC_PATIENTS_1mm,MEAN_STR3_EXEC_PATIENTS_2mm,MEAN_STR3_EXEC_PATIENTS_3mm,MEAN_STR3_EXEC_PATIENTS_4mm,MEAN_STR3_MOTOR_Pall_PATIENTS_1mm,MEAN_STR3_MOTOR_Pall_PATIENTS_2mm,MEAN_STR3_MOTOR_Pall_PATIENTS_3mm,MEAN_STR3_MOTOR_Pall_PATIENTS_4mm,MEAN_PATIENTS_GM_thr0.00_1,MEAN_PATIENTS_GM_thr0.00_2,MEAN_PATIENTS_GM_thr0.00_4,MEAN_PATIENTS_GM_thr0.00_6,MEAN_PATIENTS_GM_thr0.00_8,MEAN_PATIENTS_GM_thr0.00_10,MEAN_PATIENTS_GM_1,MEAN_PATIENTS_GM_2,MEAN_PATIENTS_GM_4,MEAN_PATIENTS_GM_6,MEAN_PATIENTS_GM_8,MEAN_PATIENTS_GM_10,MEAN_STR_LEMON_1mm,MEAN_STR_LEMON_2mm,MEAN_STR_LEMON_3mm,MEAN_STR_LEMON_4mm,MEAN_CAUD_LEMON_1mm,MEAN_CAUD_LEMON_2mm,MEAN_CAUD_LEMON_3mm,MEAN_CAUD_LEMON_4mm,MEAN_PUTA_LEMON_1mm,MEAN_PUTA_LEMON_2mm,MEAN_PUTA_LEMON_3mm,MEAN_PUTA_LEMON_4mm,MEAN_PALL_LEMON_1mm,MEAN_PALL_LEMON_2mm,MEAN_PALL_LEMON_3mm,MEAN_PALL_LEMON_4mm,MEAN_STR3_MOTOR_LEMON_1mm,MEAN_STR3_MOTOR_LEMON_2mm,MEAN_STR3_MOTOR_LEMON_3mm,MEAN_STR3_MOTOR_LEMON_4mm,MEAN_STR3_LIMBIC_LEMON_1mm,MEAN_STR3_LIMBIC_LEMON_2mm,MEAN_STR3_LIMBIC_LEMON_3mm,MEAN_STR3_LIMBIC_LEMON_4mm,MEAN_STR3_EXEC_LEMON_1mm,MEAN_STR3_EXEC_LEMON_2mm,MEAN_STR3_EXEC_LEMON_3mm,MEAN_STR3_EXEC_LEMON_4mm,MEAN_STR3_MOTOR_Pall_LEMON_1mm,MEAN_STR3_MOTOR_Pall_LEMON_2mm,MEAN_STR3_MOTOR_Pall_LEMON_3mm,MEAN_STR3_MOTOR_Pall_LEMON_4mm,MEAN_LEMON_GM_thr0.00_1,MEAN_LEMON_GM_thr0.00_2,MEAN_LEMON_GM_thr0.00_4,MEAN_LEMON_GM_thr0.00_6,MEAN_LEMON_GM_thr0.00_8,MEAN_LEMON_GM_thr0.00_10,MEAN_LEMON_GM_1,MEAN_LEMON_GM_2,MEAN_LEMON_GM_4,MEAN_LEMON_GM_6,MEAN_LEMON_GM_8,MEAN_LEMON_GM_10,MEAN_STR_ALL_1mm,MEAN_STR_ALL_2mm,MEAN_STR_ALL_3mm,MEAN_STR_ALL_4mm,MEAN_CAUD_ALL_1mm,MEAN_CAUD_ALL_2mm,MEAN_CAUD_ALL_3mm,MEAN_CAUD_ALL_4mm,MEAN_PUTA_ALL_1mm,MEAN_PUTA_ALL_2mm,MEAN_PUTA_ALL_3mm,MEAN_PUTA_ALL_4mm,MEAN_PALL_ALL_1mm,MEAN_PALL_ALL_2mm,MEAN_PALL_ALL_3mm,MEAN_PALL_ALL_4mm,MEAN_STR3_MOTOR_ALL_1mm,MEAN_STR3_MOTOR_ALL_2mm,MEAN_STR3_MOTOR_ALL_3mm,MEAN_STR3_MOTOR_ALL_4mm,MEAN_STR3_LIMBIC_ALL_1mm,MEAN_STR3_LIMBIC_ALL_2mm,MEAN_STR3_LIMBIC_ALL_3mm,MEAN_STR3_LIMBIC_ALL_4mm,MEAN_STR3_EXEC_ALL_1mm,MEAN_STR3_EXEC_ALL_2mm,MEAN_STR3_EXEC_ALL_3mm,MEAN_STR3_EXEC_ALL_4mm,MEAN_STR3_MOTOR_Pall_ALL_1mm,MEAN_STR3_MOTOR_Pall_ALL_2mm,MEAN_STR3_MOTOR_Pall_ALL_3mm,MEAN_STR3_MOTOR_Pall_ALL_4mm,MEAN_ALL_GM_thr0.00_1,MEAN_ALL_GM_thr0.00_2,MEAN_ALL_GM_thr0.00_4,MEAN_ALL_GM_thr0.00_6,MEAN_ALL_GM_thr0.00_8,MEAN_ALL_GM_thr0.00_10,MEAN_ALL_GM_1,MEAN_ALL_GM_2,MEAN_ALL_GM_4,MEAN_ALL_GM_6,MEAN_ALL_GM_8,MEAN_ALL_GM_10
126439333,-13.467693,2.255686,3.432523,"(-13.467692624176099, 2.2556863621849201, 3.43...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.94524,0.78975,0.966225,0.011833,1394.345581,0.989525,,,,,,,,,,,,,,,,,,,,,,,,,1.802169,0.779833,0.958325,0.0001,2150.244385,0.989925,-2.137132,,0.033117,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.880892,0.733661,0.955336,0.017064,1343.924072,0.980879,,,,,,,,,,,,,,,,,,,,,,,,,1.827542,0.759503,0.954697,0.012633,2220.421875,0.986867,-2.091411,,0.040158,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.062575,0.068675,,,,,,,,,,,,,,,0.062575,0.068675,,0.012041,0.043625,0.054616,0.058094,0.057302,,0.010119,0.041043,0.032146,0.019248,0.009335,,,,,,,,,,,,,,,0.048417,0.05621,,,,,,,,,,,,,,,0.048417,0.05621,,0.006475,0.035088,0.049518,0.052665,0.052125,,0.006475,0.033433,0.023364,0.010929,0.002097,,,,,,,,,,,,,,,0.074357,0.079958,,,,,,,,,,,,,,,0.074357,0.079958,0.000675,0.004594,0.027371,0.040553,0.047259,0.048396,0.000449,0.002704,0.014075,0.017452,0.013996,0.008554,,,,,,,,,,,,,,,0.068416,0.074434,,,,,,,,,,,,,,,0.068416,0.074434,0.000458,0.003964,0.025395,0.038772,0.045811,0.047311,0.000304,0.002348,0.013047,0.016171,0.012646,0.007206
126432538,-53.603259,-8.120055,0.868643,"(-53.603259238745785, -8.12005526025591, 0.868...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000265,0.000333,0.000573,0.000647,0.000756,-0.007511,-0.008295,-0.010352,-0.010512,-0.0131,-0.014798,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000696,0.000725,0.000721,-0.014283,-0.01525,-0.015482,-0.015565,-0.017517,-0.018457,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.001274,0.001159,0.001077,-0.008329,-0.009362,-0.009243,-0.009185,-0.011193,-0.012776,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.001283,0.001044,0.000914,-0.00909,-0.010071,-0.010276,-0.010227,-0.012266,-0.013677
126435762,-16.288739,-34.198856,-19.944686,"(-16.2887392340876, -34.198856409531899, -19.9...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000658,0.000316,0.004218,0.003884,0.003644,-0.015568,-0.014134,-0.014413,-0.013566,-0.013381,-0.013377,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.001225,0.001841,0.00157,0.001557,-0.01568,-0.016575,-0.021241,-0.023073,-0.02241,-0.021094,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000277,0.003076,0.002507,0.002637,-0.009961,-0.010811,-0.011691,-0.011646,-0.011798,-0.011739,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.1e-05,0.003061,0.002131,0.002107,-0.011785,-0.012178,-0.012919,-0.013119,-0.013036,-0.012787
126439285,-17.820212,3.595613,-32.872868,"(-17.820211862154199, 3.5956125920259701, -32....",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000368,0.000398,0.000637,0.00108,0.001356,0.001461,-0.001903,-0.003211,-0.007244,-0.009429,-0.010958,-0.011988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000294,0.00059,0.001133,0.001391,0.001469,-0.006107,-0.007081,-0.011379,-0.015942,-0.019306,-0.020558,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00013,0.000175,0.000154,0.000162,0.000663,-0.001071,-0.002832,-0.007346,-0.011512,-0.013568,-0.014067,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.4e-05,9.5e-05,0.00018,0.000189,0.000199,-0.001639,-0.002875,-0.006208,-0.009483,-0.011921,-0.01294
126435744,-23.726456,-25.139,-16.692841,"(-23.7264555931642, -25.138999524959399, -16.6...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.002226,0.004855,0.006771,0.008202,-0.023416,-0.025105,-0.021038,-0.015623,-0.014004,-0.012541,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000531,0.001833,0.003544,0.004293,0.005882,-0.017128,-0.016154,-0.012374,-0.012302,-0.014635,-0.015245,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.9e-05,0.00067,0.003744,0.00513,0.006953,-0.020761,-0.019335,-0.015081,-0.011171,-0.010688,-0.009855,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000131,0.000792,0.003078,0.004411,0.006203,-0.020665,-0.01919,-0.014494,-0.011391,-0.011369,-0.010711


In [28]:
GOALL = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.all.v6.2.symbols.gmt'
GOBP  = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.bp.v6.2.symbols.gmt'
GOMF  = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.cc.v6.2.symbols.gmt'
GOCC  = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.mf.v6.2.symbols.gmt'
            
GOALLREACTOME = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.reactome.v6.2.symbols.gmt,gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.all.v6.2.symbols.gmt'
GOBPREACTOME  = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.reactome.v6.2.symbols.gmt,gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.bp.v6.2.symbols.gmt'
GOALLKEGG     ='gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.kegg.v6.2.symbols.gmt,gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.all.v6.2.symbols.gmt'
GOALLREACTKEGG= 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.kegg.v6.2.symbols.gmt,gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.reactome.v6.2.symbols.gmt,gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.all.v6.2.symbols.gmt'

CURATEDALL = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.all.v6.2.symbols.gmt'
CANONICAL  = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.v6.2.symbols.gmt' 
CHEMGEN    = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cgp.v6.2.symbols.gmt'
BIOCARTA   = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.biocarta.v6.2.symbols.gmt'
KEGG       = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.kegg.v6.2.symbols.gmt'
REACTOME   = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.reactome.v6.2.symbols.gmt'
HALLMARK   = 'gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/h.all.v6.2.symbols.gmt' 



In [29]:

def make_geneset_enrichment_stats(nucleus, gset, gset_name, gset_size, fname, corr_type='pearson'):

    ################################
    # This code runs a Geneset enrichment analysis 
    # Gene Set Enrichment Analysis (GSEA) is a computational method 
    # that determines whether an a priori defined set of genes shows statistically 
    # significant, concordant differences between two biological states (e.g. phenotypes). 
    # https://software.broadinstitute.org/gsea/index.jsp
    
    ################################
    # Create preranked table 
    # This is a 2x20737 matrix with gene names for col1 and R-values for col2
    # R-Values represent the association between gene-expression leves and t-statistical susceptibility difference 
    
    print 'Running gene set enrichment analysis for %s nucleus with %s and size %s and %s correlation' %(fname, gset_name, gset_size, corr_type)
    
    rank_file   = os.path.join(gsea_dir, 'GSEA_%s_%s.rnk'%(fname,corr_type))
    
    if not os.path.isfile(rank_file):
        print '.....creating rank file'
        GENES = AHBA.columns[:-28]
        df_GSEA = pd.DataFrame(index = ['r_val'], columns =GENES)#.T

        df_chi  = df_MNI.drop([i for i in df_MNI.columns if i not in ['STR_tstat_CP_1mm']],axis=1)
        df_chi  = pd.DataFrame(dfs["df_MU"][nucleus].drop(wells,axis=0).dropna())

        for gene in GENES:
            #print gene
            df_chigen = pd.DataFrame(index = df_chi.index)
            df_chigen['GEN'] = AHBA[gene]
            df_chigen['chi'] = df_chi[nucleus]

            # make correlations 
            if corr_type == 'pearson':
                pearson  = pearsonr(df_chigen['chi'], df_chigen['GEN'])
                df_GSEA.loc["r_val"][gene] =  pearson[0]
            elif corr_type == 'spearman':
                spearman  = spearmanr(df_chigen['chi'], df_chigen['GEN'])
                df_GSEA.loc["r_val"][gene] =  spearman[0]

        df_GSEA = df_GSEA.T.sort_values("r_val", ascending=False)
        df_GSEA.index.name = "Gene"
        df_GSEA.to_csv(rank_file, sep='\t')
    
    ###############################
    # Run gsea-3.0.jar
    
    outfname = '%s_%s_%s' %(fname, gset_name, gset_size )
    logfile  = os.path.join(gsea_dir, 'log_%s.txt'%outfname)
    
    outfolder = [i for i in os.listdir(gsea_dir) if outfname in i ]
    
    if outfolder:
        print '.....file made:', outfolder[0]
    
    else:
        print '..... runing gsea-3.0.jar'
        gsea_cmd = ' '.join(['java -Xmx512m',
                    '-cp ~/Desktop/gsea-3.0.jar xtools.gsea.GseaPreranked', 
                    '-gmx ' + gset,
                    '-norm meandiv -nperm 1000', 
                    '-rnk '+ rank_file,
                    '-scoring_scheme weighted',
                    '-rpt_label ' + outfname,
                    '-create_svgs false',
                    '-make_sets true',
                    '-plot_top_x 1000', 
                    '-rnd_seed timestamp',
                    '-set_max %s'%gset_size, 
                    '-set_min 15',
                    '-zip_report false',
                    '-out ' + gsea_dir, 
                    '-gui false',
                    '> ' + logfile 
                  ])

        print '..... %s' %gsea_cmd
        os.system(gsea_cmd)


In [30]:
nuc = ['STR3_MOTOR_tstat_LL_1mm']


for size in [500]:
    #make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR_LL', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR_LL', corr_type='pearson')


Running gene set enrichment analysis for STR nucleus with GOALLREACTOME and size 500 and pearson correlation
.....creating rank file
.....file made: STR_GOALLREACTOME_500.GseaPreranked.1559596403531
Running gene set enrichment analysis for STR_LL nucleus with GOALLREACTKEGG and size 500 and pearson correlation
.....file made: log_STR_LL_GOALLREACTKEGG_500.txt


In [8]:
nuc = ['STR3_MOTOR_tstat_CP_1mm']
nuc = ['MEAN_STR3_MOTOR_LEMON_1mm']

for size in [500]:
    #make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR3_MOTOR_MEAN_LEMON', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR3_MOTOR_MEAN_LEMON', corr_type='pearson')


Running gene set enrichment analysis for STR3_MOTOR_MEAN_LEMON nucleus with HALLMARK and size 500 and pearson correlation
..... runing gsea-3.0.jar
..... java -Xmx512m -cp ~/Desktop/gsea-3.0.jar xtools.gsea.GseaPreranked -gmx gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/h.all.v6.2.symbols.gmt -norm meandiv -nperm 1000 -rnk /Users/kanaaax/Desktop/GSEA/GSEA_STR3_MOTOR_MEAN_LEMON_pearson.rnk -scoring_scheme weighted -rpt_label STR3_MOTOR_MEAN_LEMON_HALLMARK_500 -create_svgs false -make_sets true -plot_top_x 1000 -rnd_seed timestamp -set_max 500 -set_min 15 -zip_report false -out /Users/kanaaax/Desktop/GSEA -gui false > /Users/kanaaax/Desktop/GSEA/log_STR3_MOTOR_MEAN_LEMON_HALLMARK_500.txt


In [None]:
nuc = ['MEAN_STR_LEMON_1mm']

for size in [500]:
    #make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR_MEAN_LEMON', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR_sp', corr_type='spearman')


In [8]:
nuc = ['STR3_MOTOR_tstat_CP_1mm']

for size in [500]:
    #make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR3_MOTOR_plt', corr_type='pearson')
    #make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR3_MOTOR_spr', corr_type='spearman')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR3_MOTOR_spr', corr_type='spearman')
    

Running gene set enrichment analysis for STR3_MOTOR nucleus with GOALLREACTKEGG and size 500 and pearson correlation
.....creating rank file
.....file made: log_STR3_MOTOR_GOALLREACTKEGG_500.txt
Running gene set enrichment analysis for STR3_MOTOR nucleus with HALLMARK and size 500 and pearson correlation
.....file made: STR3_MOTOR_HALLMARK_500.GseaPreranked.1559603519788
Running gene set enrichment analysis for STR3_MOTOR_spr nucleus with GOALLREACTKEGG and size 500 and spearman correlation
.....creating rank file
.....file made: log_STR3_MOTOR_spr_GOALLREACTKEGG_500.txt
Running gene set enrichment analysis for STR3_MOTOR_spr nucleus with HALLMARK and size 500 and spearman correlation
.....file made: STR3_MOTOR_spr_HALLMARK_500.GseaPreranked.1559860839809


In [9]:
nuc = ['STR3_EXEC_tstat_CP_1mm']

for size in [500]:
    make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, G®†OALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR3_EXEC', corr_type='pearson')


SyntaxError: invalid syntax (<ipython-input-9-14840d89c641>, line 7)

In [None]:
nuc = ['STR3_LIMBIC_tstat_CP_1mm']

for size in [500]:
    make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'STR3_MOTOR', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'STR3_LIMBIC', corr_type='pearson')


In [12]:
nuc = ['GM_0.0_tstat_CP_1mm']

for size in [500]:
    make_geneset_enrichment_stats(nuc, GOALL, 'GO_all' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, KEGG, 'KEGG' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, REACTOME, 'REACTOME' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTOME, 'GOALLREACTOME' , gset_size=size, fname = 'GM', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLKEGG, 'GOALLKEGG' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, GOALLREACTKEGG, 'GOALLREACTKEGG' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')
    make_geneset_enrichment_stats(nuc, HALLMARK, 'HALLMARK' , gset_size=size, fname = 'GM_0.0', corr_type='pearson')


Running gene set enrichment analysis for GM_0.0 nucleus with GO_all and size 500 and pearson correlation
.....creating rank file
..... runing gsea-3.0.jar
..... java -Xmx512m -cp ~/Desktop/gsea-3.0.jar xtools.gsea.GseaPreranked -gmx gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c5.all.v6.2.symbols.gmt -norm meandiv -nperm 1000 -rnk /Users/kanaaax/Desktop/GSEA/GSEA_GM_0.0_pearson.rnk -scoring_scheme weighted -rpt_label GM_0.0_GO_all_500 -create_svgs false -make_sets true -plot_top_x 1000 -rnd_seed timestamp -set_max 500 -set_min 15 -zip_report false -out /Users/kanaaax/Desktop/GSEA -gui false > /Users/kanaaax/Desktop/GSEA/log_GM_0.0_GO_all_500.txt
Running gene set enrichment analysis for GM_0.0 nucleus with KEGG and size 500 and pearson correlation
..... runing gsea-3.0.jar
..... java -Xmx512m -cp ~/Desktop/gsea-3.0.jar xtools.gsea.GseaPreranked -gmx gseaftp.broadinstitute.org://pub/gsea/gene_sets_final/c2.cp.kegg.v6.2.symbols.gmt -norm meandiv -nperm 1000 -rnk /Users/kanaaax/De

In [1]:
import pandas as pd 

In [22]:
x = pd.read_csv('/Users/kanaaax/Desktop/GSEA/STR3_MOTOR_GOALLREACTKEGG_500.GseaPreranked.1559602912792/edb/GSEA_STR3_MOTOR_pearson.rnk',
            sep='\t', header=None)[0][0:1000]
list(x)

['RPL22',
 'COL16A1',
 'ELL2',
 'AC005393.1',
 'C17orf56',
 'MAGI2',
 'RPL10A',
 'C20orf39',
 'PLEKHA1',
 'ZDHHC11',
 'LSM3',
 'SUPT4H1',
 'LOC648771',
 'RPL23',
 'PSRC1',
 'LOC440311',
 'FTH1P20',
 'POLR3GL',
 'CNOT2',
 'POLR2I',
 'SFTPC',
 'ARHGEF2',
 'SERTAD2',
 'MYH9',
 'EXOSC10',
 'HNRNPC',
 'SNHG1',
 'MAP3K3',
 'PUF60',
 'MAN2A2',
 'RPS8',
 'NACA',
 'BLOC1S1',
 'GUK1',
 'CTSB',
 'DDRGK1',
 'ANAPC5',
 'NACA2',
 'LOC653881',
 'GPR124',
 'NME2P1',
 'LOC100289173',
 'RPL32',
 'RAPGEF5',
 'DNAJC25-GNG10',
 'SHB',
 'TMEM50A',
 'SEPW1',
 'BSG',
 'HNRNPUL1',
 'MGRN1',
 'RPL37',
 'OSTF1',
 'RPL31',
 'PABPN1',
 'VASH1',
 'NME2',
 'FAHD2B',
 'SH3PXD2A',
 'SH3GL1',
 'SNED1',
 'RPL23A',
 'RPL36A',
 'OSBPL1A',
 'MTMR2',
 'KIAA0892',
 'TMEM151A',
 'RPL13AP3',
 'CA1',
 'STAC2',
 'RPS3P3',
 'RPL15',
 'UXT',
 'DLG1',
 'UNC5B',
 'MYO9B',
 'MYBPHL',
 'RPL11',
 'HNRPDL',
 'EIF3G',
 'RPL35A',
 'EXOSC1',
 'AMZ2',
 'RPL39',
 'MRPL33',
 'EIF1',
 'IFRD2',
 'H3F3AP4',
 'RNF220',
 'RBMX',
 'C8orf46',
 'AFG3