In [2]:
import os 
import zipfile
import StringIO
import pandas as pd
import numpy as np
import itertools
pd.set_option('max_colwidth', 800)

datadir = '/Users/kanaaax/Google Drive/TS-EUROTRAIN/RESULTS_QSMv3/dataframes/AHBA/ahba_data/'

# https://human.brain-map.org/
subjects = ['H0351.2001', 'H0351.2002', 'H0351.1009', 'H0351.1012', 'H0351.1015', 'H0351.1016']
donors   = ['donor1'    , 'donor2'    , 'donor3'    , 'donor4'    , 'donor5'    , 'donor6'    ]


# Allen Human Brain Atlas microarray data processing

- Data:
    - Complete normalized microarray datasets of six brains were downloaded from  http://human.brain-map.org/static/download
    - The datasets contain gene expression values normalized across all brains using an improved normalization process implemented in March 2013.
    - 
    - Hawrylycz MJ, et al. An anatomically comprehensive atlas of the adult human brain transcriptome. Nature 2012; 489: 391–399.

- Processing
    - 
    

In [5]:
##########################################################################

# Function to create 3702x20737 (samples x genes) matrix

##########################################################################

def return_donor_geneexpression(donor):
    
    #I/O
    print 'Donor:', donor
    donor_zip  = zipfile.ZipFile(os.path.join(datadir,'%s.zip'%donor))
    
    #read microarray data. take care indexing.... should have 58692 probes
    df_microarray = pd.read_csv(StringIO.StringIO(donor_zip.read('MicroarrayExpression.csv')),
                                header= None,index_col =0 )
    print '- Microarray dataframe shape: ',df_microarray.shape

    # read probe meta-data
    df_probes = pd.read_csv(StringIO.StringIO(donor_zip.read('Probes.csv')), index_col =0 )
    del df_probes.index.name
    
    # drop unannotated genes... should end up with 48170 probes
    drop_strings = ['uncharacterized', 'non-RefSeq']
    drop_strings = ['non-RefSeq']
    drop_probes  = [i for i in df_probes.index for x in drop_strings if x in df_probes.loc[i]['gene_name'] ]
    df_probes = df_probes.drop(drop_probes, axis =0)
    print '- Probes dataframe shape: ',df_probes.shape
    
    # Calc mean probe expression for each gene
    df_gene_exp = pd.concat([df_microarray, df_probes['gene_symbol']],axis=1).groupby('gene_symbol').mean().T.reset_index().drop(['index'],axis=1)
    df_gene_exp.columns.name = None
    print '- Gene-expression original-annotation dataframe shape: ',df_gene_exp.shape

    # Add metadata to dataframe
    df_meta = pd.read_csv(StringIO.StringIO(donor_zip.read('SampleAnnot.csv')))
    print '- Metadata dataframe shape: ',df_meta.shape
    
    df = pd.concat([df_gene_exp,df_meta],axis=1).set_index('well_id')
    print '- Full dataframe shape:', df.shape
    print ''
    del df.index.name
    
    # append donor name
    df['donor'] = donor
    return df


In [6]:
ahba_all = []
for donor in donors:
    df = return_donor_geneexpression(donor)
    ahba_all.append(df)

ahba = pd.concat(ahba_all)

Donor: donor1
- Microarray dataframe shape:  (58692, 946)
- Probes dataframe shape:  (48170, 6)
- Gene-expression original-annotation dataframe shape:  (946, 20737)
- Metadata dataframe shape:  (946, 13)
- Full dataframe shape: (946, 20749)

Donor: donor2
- Microarray dataframe shape:  (58692, 893)
- Probes dataframe shape:  (48170, 6)
- Gene-expression original-annotation dataframe shape:  (893, 20737)
- Metadata dataframe shape:  (893, 13)
- Full dataframe shape: (893, 20749)

Donor: donor3
- Microarray dataframe shape:  (58692, 363)
- Probes dataframe shape:  (48170, 6)
- Gene-expression original-annotation dataframe shape:  (363, 20737)
- Metadata dataframe shape:  (363, 13)
- Full dataframe shape: (363, 20749)

Donor: donor4
- Microarray dataframe shape:  (58692, 529)
- Probes dataframe shape:  (48170, 6)
- Gene-expression original-annotation dataframe shape:  (529, 20737)
- Metadata dataframe shape:  (529, 13)
- Full dataframe shape: (529, 20749)

Donor: donor5
- Microarray dataf

IOError: [Errno 2] No such file or directory: '/Users/kanaaax/Google Drive/TS-EUROTRAIN/RESULTS_QSMv3/dataframes/AHBA/ahba_data/donor6.zip'

In [None]:
#metadata included in the last 13 columns 
print 'AHBA original dataframe shape', ahba.iloc[:,:-13].shape


In [None]:
ahba.head()

## Arnatkevic̆iūtė A, Fulcher BD, Fornito A. A practical guide to linking brain-wide gene expression and neuroimaging data. Neuroimage 2019; 189: 353–367.

3.1. Step 1. Probe-to-gene re-annotation 

In microarray experiments, probe sequences correspond to a unique
portion of DNA and are assigned to genes based on available genome sequencing databases (O'Leary et al., 2016). While the AHBA (and other platforms) provide annotation tables where probes are mapped to genes, this information gets outdated with each update of the sequencing da- tabases. An accurate probe-to-gene mapping is essential for obtaining biologically meaningful findings. It is therefore necessary to re-assign probes to genes using the most current information available. This re-annotation can be done using several methods and toolboxes, some of which are summarized in Table 1. To the best of our knowledge, only three studies using the AHBA have performed probe-to-gene re-an- notation (Richiardi et al., 2015; Eising et al., 2016; Romero-Garcia et al., 2018b).


To investigate how probe-to-gene annotations change over time, we supplied a list of all available 60 bp length AHBA probe sequences (n ¼ 58,692) to the Re-annotator toolkit (Arloth et al., 2015)(Table 1). We found that 45,821 probes (78%) were uniquely annotated to a gene and could be related to an entrez ID - a stable identifier for a gene generated by the Entrez Gene database at the National Center for Biotechnology Information (NCBI). A total of 19% of probes were not mapped to a gene, and just under 3% were mapped to multiple genes and could not be unambiguously annotated. Of the probes that were unambiguously annotated to a gene, 3438 (7:5%) of the annotations differed from those provided by the AHBA: 1287 probes were re-annotated to new genes and 2151 probes that were not previously assigned to any gene in the AHBA could now be annotated. Addi- tionally, 6211 (? 10%) probes in the initial AHBA dataset had an inconsistent gene symbol, ID or gene name information according to the NCBI database (https://www.ncbi.nlm.nih.gov/), as of 5th March 2018. Because of these differences, we recommend obtaining probe-to-gene annotations and retrieving the gene symbol ID and name from the latest version of NCBI (ftp://ftp.ncbi.nlm.nih.gov/ gene/DATA/GENE_INFO/Mammalia/). Hereafter, we present all ana- lyses using this newly re-annotated set of 45,821 probes, correspond- ing to 20,232 unique genes.

In [None]:
##########################################################################

# Function to create 3702x20237 (samples x genes) matrix with reannotation

##########################################################################

# Reannotation based on re-annotator code as done by:
# Arnatkevic̆iūtė A, Fulcher BD, Fornito A. A practical guide to linking brain-wide gene expression and neuroimaging data. 
# Neuroimage 2019; 189: 353–367. https://figshare.com/articles/AHBAdata/6852911
# Arloth J, Bader DM, Röh S, Altmann A. Re-Annotator: Annotation pipeline for microarray probe sequences. PLoS One 2015; 10: 1–13.
        
reannot = pd.read_csv(os.path.join(datadir,'reannotatedProbes.csv'))#.set_index('ID')
reannot['ID'] = reannot['ID'].fillna("0").astype(int)
reannot['allenID'] = reannot['allenID'].fillna("0").astype(int)
reannot = reannot.set_index('probeNames').sort_index()

def return_donor_geneexpression_reannot(donor):
    print 'Donor:', donor
    #I/O
    donor_zip  = zipfile.ZipFile(os.path.join(datadir,'%s.zip'%donor))

    #read microarray data. take care indexing.... should have 58692 probes
    df_microarray = pd.read_csv(StringIO.StringIO(donor_zip.read('MicroarrayExpression.csv')),
                                header= None,index_col =0 )
    print '- Microarray dataframe shape: ',df_microarray.shape

    # Grab probes df 
    df_probes = pd.read_csv(StringIO.StringIO(donor_zip.read('Probes.csv')), index_col =0 )
    del df_probes.index.name
    print '- Probes dataframe shape: ',df_probes.shape

    # grab microarray column and add probe name as index... now u have a matrix of probes x wells
    df_probes = pd.concat([df_microarray, df_probes['probe_name']],axis=1).set_index('probe_name')
    del df_probes.index.name
    print '- Probes original-annotation dataframe shape: ',df_probes.shape

    # drop unannotated AHBA probes based on re-annotater probes 
    df_probes = df_probes.drop([i for i in df_probes.index if i not in reannot.index], axis =0).sort_index()
    df_probes['GeneReannot'] = reannot['geneNames']
    print '- Probes re-annotation dataframe shape: ',df_probes.shape

    df_gene_exp = df_probes.groupby('GeneReannot').mean().T.reset_index().drop(['index'],axis=1)
    df_gene_exp.columns.name = None
    print '- Gene-expression Re-annotation dataframe shape: ',df_gene_exp.shape
    print 
    # Add metadata to dataframe
    df_meta = pd.read_csv(StringIO.StringIO(donor_zip.read('SampleAnnot.csv')))
    df = pd.concat([df_gene_exp,df_meta],axis=1).set_index('well_id')
    del df.index.name

    # append donor name
    df['donor'] = donor
    return df

In [None]:
reannot.head()

In [None]:
ahba_all_reannot = []
for donor in donors:
    df_reannot = return_donor_geneexpression_reannot(donor)
    ahba_all_reannot.append(df_reannot)
    
ahba_reannot = pd.concat(ahba_all_reannot)

In [None]:
# metadata included in the last 13 columns 
print 'AHBA re-annotated dataframe shape', ahba_reannot.iloc[:,:-13].shape

In [None]:
ahba_reannot.head()

# Intensity Based Filtering 

- The background gene list is provided in Dataset S2 and was calculated by excluding probes which did not exceed the background noise in the AHBA dataset (intensity based filtering), as described by [15]. We used code from [15], with options.probeSelections = ‘maxIntensity’, inline with the maximum intensity approach used to derive our regional gene expression values [16].


In [None]:
BACKGROUND_GENES = pd.read_csv(os.path.join(datadir,'background_gene_list.txt'), sep='\t').GeneSymbol.values
ahba_ibf = ahba_reannot.drop([i for i in ahba_reannot.columns if i not in BACKGROUND_GENES], axis =1)

print 'AHBA re-annotated and intensity based filtered brain genes dataframe shape', ahba_ibf.shape

In [None]:
# Save data 
#ahba.to_csv(os.path.join(datadir,'AHBA_20737X.csv'))
#ahba_reannot.to_csv(os.path.join(datadir,'AHBA_20267.csv'))
#ahba_ibf.to_csv(os.path.join(datadir,'AHBA_15745.csv'))

# Extract gene expression for Striatal Coordinates 

In [3]:
######################################################
# Read Sample data  - QSM stat map t-stat values 
# T-stat values calculated via FSL-Randomise 
# Coordinates extracted based on mask 
# Coords are QCd below by checking AHBA ontology to ensure
# that they fall within Striatal subdivisions
######################################################

ahba_dir     = '/Users/kanaaax/Google Drive/TS-EUROTRAIN/RESULTS_QSMv3/dataframes/AHBA/'
MNI = pd.read_csv(os.path.join(ahba_dir,'QSM_TSTATS/MNI_NIFTI_VALUES_permute_10K_OCT2.csv'), index_col = 0 )

In [4]:
######################################################
# Read Feature data - QSM stat map t-stat values 
######################################################

ahba         = pd.read_csv(os.path.join(datadir,'AHBA_20737.csv'), index_col=0)#
ahba_reannot = pd.read_csv(os.path.join(datadir,'AHBA_20267.csv'), index_col=0)#
ahba_ibf     = pd.read_csv(os.path.join(datadir,'AHBA_15745.csv'), index_col=0)#

  interactivity=interactivity, compiler=compiler, result=result)


In [62]:
# wells outside the striatal masks 
drop_wells = [2209, # donor1 claustrum	claustrum, right
              2233, # donor1 claustrum	claustrum, left
              2257, # donor1 claustrum	claustrum, right
              2265, # donor1 claustrum	claustrum, left
              2249, # donor1 claustrum	claustrum, left
              #2370, # donor1 claustrum, left	 E
              2371, # donor1 claustrum	claustrum, right
              2379, # donor1 claustrum	claustrum, left
              2977, # donor1	claustrum	claustrum, left
              5667, # donor2 claustrum	claustrum, right
              5705, # donor2	claustrum	claustrum, left
              5675, # donor2 claustrum	claustrum, left
              5769, # donor2	claustrum	claustrum, left
              5898, # donor2 dorsal thalamus	anterior group of nuclei
              11270, # donor3	corpus callosum	NaN 
              127687121, # donor5	corpus callosum	NaN
              147628210, # donor5	claustrum	claustrum, left
              147628277, # donor5 claustrum	claustrum, left
              156906844, # donor5	claustrum	claustrum, left
              160091500, # donor6 basal forebrain	septal nuclei
              2370,
              126432198, # donor4 parolfactory gyri, left	frontal lobe
              160091566, #   donor6 perifornical nucleus, left	tuberal region
              160091502, #   donor6 ventromedial hypothalamic nucleus, left	tuberal region
              #159226045, # donor6 striatum//caudate nucleus//body of the caudate nucleus//body of caudate nucleus, left'
              #1234 # not in STRM 
             ]

In [63]:
# check for outlier 
chi_STR    = MNI['STR_tstat_CP_1mm'].dropna()
chi_STR3M  = MNI['STR3_MOTOR_tstat_CP_1mm'].dropna()
chi_STR3MLL  = MNI['STR3_MOTOR_tstat_LL_1mm'].dropna()
chi_STR3E  = MNI['STR3_EXEC_tstat_CP_1mm'].dropna()
chi_STR3L  = MNI['STR3_LIMBIC_tstat_CP_1mm'].dropna()

print 'Coords based on masking following non-linear transformation'
print 'Striatal coords = ', len(chi_STR)
print 'Striatal motor coords = ', len(chi_STR3M)
print 'Striatal exec coords = ', len(chi_STR3E)
print 'Striatal limbic coords = ', len(chi_STR3L)
print 'Striatal motor coords = LL', len(chi_STR3MLL)


Coords based on masking following non-linear transformation
Striatal coords =  103
Striatal motor coords =  62
Striatal exec coords =  55
Striatal limbic coords =  45
Striatal motor coords = LL 63


In [64]:
print [i for i in chi_STR3MLL.index if i not in chi_STR3M]

[1234]


In [65]:
ahba_ontology = ['Ontology','Ontology_4', 'donor']

def return_ahba_nucleus_ontology_df(df_ahba, df_chi):
    
    ahba_nuc = df_ahba.drop([i for i in ahba.index if i not in df_chi.index],axis =0)
    ahba_nuc = ahba_nuc.drop([i for i in ahba_nuc.columns if i not in ahba_ontology],axis =1)
    ahba_nuc = ahba_nuc.drop([i for i in ahba_nuc.index if i in drop_wells],axis=0)

    print len(ahba_nuc)
    return ahba_nuc

df_ahba_str = return_ahba_nucleus_ontology_df(ahba, chi_STR)
df_ahba_str3m = return_ahba_nucleus_ontology_df(ahba, chi_STR3M)
print '_____'
df_ahba_str3mLL = return_ahba_nucleus_ontology_df(ahba, chi_STR3MLL)
df_ahba_str3e = return_ahba_nucleus_ontology_df(ahba, chi_STR3E)
df_ahba_str3l = return_ahba_nucleus_ontology_df(ahba, chi_STR3L)
df_ahba_str3l = return_ahba_nucleus_ontology_df(ahba, chi_STR3MLL)
ind_ontlogy_str3m = df_ahba_str3m[df_ahba_str3m['Ontology'].str.contains('striatum')].index
ind_ontlogy_str3m = df_ahba_str3m[df_ahba_str3m['Ontology'].str.contains('striatum')].index
ind_ontlogy_str3l = df_ahba_str3l[df_ahba_str3l['Ontology'].str.contains('striatum')].index
ind_ontlogy_str3e = df_ahba_str3e[df_ahba_str3e['Ontology'].str.contains('striatum')].index
ind_ontology_str  = list(itertools.chain(ind_ontlogy_str3m, ind_ontlogy_str3l, ind_ontlogy_str3e))

#drop_wells = list(itertools.chain(drop_wells, [i for i in ahba.index if i not in ind_ontology_str]))

88
48
_____
48
47
40
48


In [66]:
df_ahba_str3m

Unnamed: 0,donor,Ontology,Ontology_4
1028,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
977,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
941,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
949,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//caudate nucleus//body of the caudate nucleus//body of caudate nucleus, left",basal ganglia
933,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
788,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
827,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, right",basal ganglia
780,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia
1170,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, right",basal ganglia
1171,donor1,"brain//gray matter//telencephalon//cerebral nuclei//basal ganglia//striatum//putamen//putamen, left",basal ganglia


In [67]:
def make_sample_feature_dataframe(nucleus): 
    
    print 'Creating n_samples x n_features dataframes for:', nucleus
    # Grab t-stat dataframe.... contains t-stat values for wells within nucleus 
    
    chi = pd.DataFrame(MNI[nucleus]).dropna().sort_index()
    chi = chi.drop([i for i in chi.index if i in drop_wells],axis=0)
    print ' - Nucleus contains this many samples:', len(chi.index)    
    print ''
    
    ahbax = ahba.iloc[:,:-28].sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    ahba_reannotx = ahba_reannot.iloc[:,:-13].sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    ahba_ibfx = ahba_ibf.sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    
    ahbax.to_csv(os.path.join(datadir,'index/AHBA_20737_index_%s.csv'%nucleus))
    ahba_reannotx.to_csv(os.path.join(datadir,'index/AHBA_20267_index_%s.csv'%nucleus))
    ahba_ibfx.to_csv(os.path.join(datadir,'index/AHBA_15745_index_%s.csv'%nucleus))
    
    chi.to_csv(os.path.join(datadir,'index/chi_index_%s.csv'%nucleus))
    
make_sample_feature_dataframe('STR3_MOTOR_tstat_CP_1mm')
make_sample_feature_dataframe('STR3_EXEC_tstat_CP_1mm')
make_sample_feature_dataframe('STR3_LIMBIC_tstat_CP_1mm')
#make_sample_feature_dataframe('STR_tstat_CP_1mm')
make_sample_feature_dataframe('STR3_MOTOR_tstat_LL_1mm')


Creating n_samples x n_features dataframes for: STR3_MOTOR_tstat_CP_1mm
 - Nucleus contains this many samples: 48

Creating n_samples x n_features dataframes for: STR3_EXEC_tstat_CP_1mm
 - Nucleus contains this many samples: 47

Creating n_samples x n_features dataframes for: STR3_LIMBIC_tstat_CP_1mm
 - Nucleus contains this many samples: 40

Creating n_samples x n_features dataframes for: STR3_MOTOR_tstat_LL_1mm
 - Nucleus contains this many samples: 48



In [68]:
#chi.to_csv(os.path.join(datadir,'index/chi_index_%s.csv'%nucleus))

In [69]:
def make_str_combined_sample_feature_dataframe():
    
    print 'Creating n_samples x n_features dataframes for full striatum'
    
    # Grab t-stat dataframe.... contains t-stat values for wells within nucleus 
    chiM = pd.DataFrame(MNI['STR3_MOTOR_tstat_CP_1mm']).dropna()
    chiM = chiM.drop([i for i in chiM.index if i in drop_wells],axis=0)
    print chiM.shape

    chiL = pd.DataFrame(MNI['STR3_LIMBIC_tstat_CP_1mm']).dropna()
    chiL = chiL.drop([i for i in chiL.index if i in drop_wells],axis=0)
    chiL = chiL.drop([i for i in chiL.index if i in chiM.index],axis=0)
    print chiL.shape
    
    chiE = pd.DataFrame(MNI['STR3_EXEC_tstat_CP_1mm']).dropna()
    chiE = chiE.drop([i for i in chiE.index if i in drop_wells],axis=0)
    chiE = chiE.drop([i for i in chiE.index if i in chiM.index],axis=0)
    chiE = chiE.drop([i for i in chiE.index if i in chiL.index],axis=0)
    print chiE.shape

    chiM.rename(columns={'STR3_MOTOR_tstat_CP_1mm':'STR3'}, inplace=True)
    chiE.rename(columns={'STR3_EXEC_tstat_CP_1mm':'STR3'}, inplace=True)
    chiL.rename(columns={'STR3_LIMBIC_tstat_CP_1mm':'STR3'}, inplace=True)
    
    chi = pd.concat([chiM,chiE, chiL],axis = 0)
    print ' - Nucleus contains this many samples:', len(chi.index)    
    print ''
    
    ahbax = ahba.iloc[:,:-28].sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    ahba_reannotx = ahba_reannot.iloc[:,:-13].sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    ahba_ibfx = ahba_ibf.sort_index().drop([i for i in ahba.index if i not in chi.index ], axis =0).sort_index()
    
    nucleus = 'STR3_tstat_CP_1mm'
    
    ahbax.to_csv(os.path.join(datadir,'index/AHBA_20737_index_%s.csv'%nucleus))
    ahba_reannotx.to_csv(os.path.join(datadir,'index/AHBA_20267_index_%s.csv'%nucleus))
    ahba_ibfx.to_csv(os.path.join(datadir,'index/AHBA_15745_index_%s.csv'%nucleus))
    
    chi.to_csv(os.path.join(datadir,'index/chi_index_%s.csv'%nucleus))

    print ahbax.shape
    print ahba_reannotx.shape
    print ahba_ibfx.shape
    
    return chi
x = make_str_combined_sample_feature_dataframe()

Creating n_samples x n_features dataframes for full striatum
(48, 1)
(38, 1)
(36, 1)
 - Nucleus contains this many samples: 122

(122, 20737)
(122, 20267)
(122, 15745)


In [70]:
48+36+38

122

In [71]:
print pd.read_csv(os.path.join(datadir,'index/AHBA_20737_index_STR3_MOTOR_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_20267_index_STR3_MOTOR_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_15745_index_STR3_MOTOR_tstat_CP_1mm.csv'), index_col=0).shape 
print ''
print pd.read_csv(os.path.join(datadir,'index/AHBA_20737_index_STR3_EXEC_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_20267_index_STR3_EXEC_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_15745_index_STR3_EXEC_tstat_CP_1mm.csv'), index_col=0).shape 
print ''
print pd.read_csv(os.path.join(datadir,'index/AHBA_20737_index_STR3_LIMBIC_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_20267_index_STR3_LIMBIC_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_15745_index_STR3_LIMBIC_tstat_CP_1mm.csv'), index_col=0).shape 


(48, 20737)
(48, 20267)
(48, 15745)

(47, 20737)
(47, 20267)
(47, 15745)

(40, 20737)
(40, 20267)
(40, 15745)


In [72]:
print pd.read_csv(os.path.join(datadir,'index/AHBA_20737_index_STR3_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_20267_index_STR3_tstat_CP_1mm.csv'), index_col=0).shape 
print pd.read_csv(os.path.join(datadir,'index/AHBA_15745_index_STR3_tstat_CP_1mm.csv'), index_col=0).shape 


(122, 20737)
(122, 20267)
(122, 15745)
