In [1]:
%matplotlib inline
import os
import math 
import numpy as np
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from scipy.stats import pearsonr, spearmanr
pd.options.display.max_rows = 2000
pd.options.display.max_columns = 999

sns.set_style('white')

ahba_dir  = '/scr/malta3/workspace/project_iron/AHBA'
save_fig_dir        = '/Users/kanaan/Google Drive/TS-EUROTRAIN/Papers/2016_QSM_paper/Figures_python_v2'


first_rois = ['L_Caud_Puta', 'R_Caud_Puta', 'Caud_Puta', 'L_Pall', 'R_Pall', 'L_BG', 'R_BG', 'BG']
atlas_rois = ['L_BS', 'R_BS', 'BS', 'STR3_MOTOR', 'STR3_EXEC', 'STR3_LIMBIC',
              'L_SUBCORTICAL', 'R_SUBCORTICAL', 'SUBCORTICAL']
rois = first_rois + atlas_rois

In [2]:
##################################################################################################
# Genesets 
##################################################################################################

GS_IRON_D = ['THRSP', 'TF', 'MAL', 'KLK6', 'HOMER1', 'MOBP', 'APOD', 'MOG', 'CRYAB', 'APOC1', 'CA2', 'RASGRP1', 
          'STMN4', 'LYZ','GSTM1', 'CTSS',  'DCK' ]
GS_IRON   = ['FTH1','FTL' ,'HFE','HFE2','IL1A', 'IL6', 'IL6R','IREB2','SLC40A1','TF','TFR2','TNF',]
GS_DA     = ['COMT', 'DDC', 'MAOA', 'MAOB', 'NQO1', 'PPP2CA', 'PPP2CB', 'PRKACA', 'PRKACB','PRKACG', 'TH', 'TYR']
GS_DA_receptor_bind = ['GNA13', 'DLG4', 'DNM1', 'DNM2','DRD1', 'DRD3','GNA12','GNAS',  'ARRB2', 'ATP1A3', 'PALM', 
                    'CLIC6', 'PTPN11', 'PPP1R1B', 'DNAJC14', 'CAV2', 'SLC9A3R1']
GS_DA_receptor_sig  = ['ADCY5', 'ADCY6', 'ARRB2', 'CALY','DRD1', 'DRD2', 'DRD3', 'DRD4', 'DRD5', 'FLNA', 
                   'GNA11', 'GNA14', 'GNA15', 'GNAI3', 'GNAL', 'GNAO1', 'GNAQ', 'GNAS', 'GNB1', 'GNG2', 
                   'GPR21', 'GPR52', 'GSK3A', 'GSK3B', 'HMP19', 'KLF16', 'OPRM1', 'RGS9', 'SLC9A3R1']
GS_DA_transport     = ['CHRM5','DRD1','PARK2','PARK7','SLC18A2','SLC22A1','SLC22A2','SLC22A3', 'SLC6A2',
                    'SLC6A3','SNCA']
GS_DA_transmission  = ['CDK5','CRH','CRHBP','DRD1','DRD2','DRD3','DRD4','DRD5','RASD2','TH']
GS_ANMC = ['ME1', 'ALDH5A1', 'GBE1', 'GALM', 'PYGL', 'CPS1','PFKFB3', 'PYGB', 'IDH2', 'ENO1','PPP1R1A', 
        'MDH2', 'CS','PYGM', 'PGM3', 'PHKG1', 'SLC3A2', 'PFKFB4', 'KHK','LDHB', 'PCK2','SLC2A8',  'PGM2', 
        'GPT','AKR1B1', 'NANS', 'PDK4','OGDHL','DHTKD1', 'PFKM', 'PGM1', 'PC','AGL']
GS_GLU  = ['UNC13B', 'RIMS1', 'GLS2', 'GLS', 'SLC38A2', 'SLC17A7', 'RAB3A', 'SLC1A1', 'SLC1A6', 'SLC1A7', 
        'SNAP25', 'STX1A', 'STXBP1','VAMP2', 'SYT1' ]
GS_GABA = ['GPHN', 'GABARAP','DNM1', 'GABRA1', 'GABRA2', 'GABRA3', 'GABRA4','GABRA5','GABRA6''SRC'] 
GS_GABA_GLU = GS_GLU + GS_GABA
GS_TF       = ['TF']
GS_FTH      = ['FTH1']
GS_FTL      = ['FTL']
GS_HRE      = ['HFE']
GS_HRE2     = ['HFE2']
GS_SLC25    = ['SLC25A37']
GS_SLC40    = ['SLC40A1']
GS_BIOBANK  = GS_TF + GS_FTH + GS_HRE + GS_SLC25


In [3]:
# Read Geneset expression values

def return_expression_df(geneset):
    return pd.read_csv(os.path.join(ahba_dir, 'AHBA_%s.csv'%geneset),index_col=0)

IRON             = return_expression_df('IRON')
IRON_D           = return_expression_df('IRON_D')
DA_met           = return_expression_df('DA_metabolism')
DA_jellen        = return_expression_df('DA_jellen')
DA_jellen2       = return_expression_df('DA_jellen2')
DA_receptor_bind = return_expression_df('DA_receptor')
DA_receptor_sig  = return_expression_df('DA_receptor_sig')
DA_transmission  = return_expression_df('DA_tranmission')
DA_transport     = return_expression_df('DA_transport')
ANMC             = return_expression_df('ANMC')
GLU              = return_expression_df('GLU')
GABA             = return_expression_df('GABA')
GLU_GABA         = return_expression_df('GLU_GABA')
TF               = return_expression_df('TF')
FTH              = return_expression_df('FTH')
FTL              = return_expression_df('FTL')
HRE              = return_expression_df('HRE')
HRE2             = return_expression_df('HRE2')
SLC25            = return_expression_df('SLC25')
SLC40            = return_expression_df('SLC40')
BIOBANK          = return_expression_df('BIOBANK')
HOUSEKEEPING     = return_expression_df('HOUSEKEEPING') 


IOError: File /scr/malta3/workspace/project_iron/AHBA/AHBA_IRON.csv does not exist

In [None]:

# Reda QSM values per roi for each well 
df_MNI = pd.read_csv(os.path.join(ahba_dir,'MNI_NIFTI_VALUES.csv'), index_col = 0 )

In [None]:
def concat_dfs(measure):
    df  = pd.DataFrame(index = IRON.index, 
                        columns = ['IRON', 'IRON_D', 'DA_met', 'DA_receptor_bind', 'DA_receptor_sig', 
                                   'DA_transmission', 'DA_transport', 'ANMC', 'GLU', 'GABA', 'GLU_GABA', 
                                   'TF', 'FTH', 'FTL', 'HRE', 'HRE2', 'SLC25', 'SLC40', 'BIOBANK'
                                   'top_struct', 'struct'])
    xval = 1
    df['top_struct']        = IRON.top_struct
    df['struct']            = IRON.struct_name
    df['IRON']              = IRON[measure] * xval
    df['IRON_D']            = IRON_D[measure] * xval
    df['DA_met']            = DA_met[measure] * xval
    df['DA_jellen']         = DA_jellen[measure] * xval
    df['DA_jellen2']        = DA_jellen2[measure] * xval
    df['DA_receptor_bind']  = DA_receptor_bind[measure] * xval
    df['DA_receptor_sig']   = DA_receptor_sig[measure] * xval
    df['DA_transmission']   = DA_transmission[measure] * xval
    df['DA_transport']      = DA_transport[measure] * xval
    df['ANMC']              = ANMC[measure] * xval
    df['GLU']               = GLU.PCA * xval
    df['GABA']              = GABA.PCA * xval
    df['GLU_GABA']          = GLU_GABA[measure] * xval
    df['TF']                = TF[measure] * xval
    df['FTH']               = FTH[measure] * xval
    df['FTL']               = FTL[measure] * xval
    df['HRE']               = HRE[measure] * xval
    df['HRE2']              = HRE2[measure] * xval
    df['SLC25']             = SLC25[measure] * xval
    df['SLC40']             = SLC40[measure] * xval
    df['BIOBANK']           = BIOBANK[measure] * xval
    df['HOUSEKEEPING']      = HOUSEKEEPING[measure] * xval
    
    dfx = pd.concat([df_MNI, df], axis = 1)
    
    return dfx 

df_PC  = concat_dfs('PCA')
df_MU  = concat_dfs('Mean')

In [None]:
rois = ['STR3_MOTOR', 'STR3_EXEC', 'STR3_LIMBIC',]

for roi in rois:
    df = pd.DataFrame(columns=  ['MNI','AHBA'])
    df['MNI']  = df_MNI['%s_PC'%roi]
    df['AHBA'] = df_MU.FTH
    df = df.dropna()
    r_val = np.round(pearsonr(df.MNI, df.AHBA)[0],2)
    p_val = np.round(pearsonr(df.MNI, df.AHBA)[1],4)
    
    if p_val < 0.1:
        print roi, r_val, p_val
        #sns.jointplot('MNI', 'AHBA',df, kind = 'reg')
    
    

In [None]:
first_rois = ['L_Caud_Puta', 'R_Caud_Puta', 'Caud_Puta', 'L_Pall', 'R_Pall', 'L_BG', 'R_BG', 'BG']
atlas_rois = ['L_BS', 'R_BS', 'BS', 'STR3_MOTOR', 'STR3_EXEC', 'STR3_LIMBIC',
              'L_SUBCORTICAL', 'R_SUBCORTICAL', 'SUBCORTICAL']

sns.jointplot('STR3_MOTOR_PC', 'BIOBANK',  df_MU, kind = 'reg')
