In [1]:
import numpy as np
import pandas as pd
import pickle
import json
import os, sys, re, time
from tqdm import tqdm


In [2]:
storage_dir = '/home/pirc/Documents/billy/school/file_storage'

dirs = os.listdir(storage_dir)
dirs = [os.path.join(storage_dir, x) for x in dirs if os.path.isdir(os.path.join(storage_dir, x))]
dirs.sort(key = lambda x: int(os.path.basename(x)))


all_results = pd.DataFrame()

for p in tqdm(dirs):
    subj = os.path.basename(p)
    res_dir = os.path.join(p,'results')
    mas_dir = os.path.join(p, 'structure_masks')

    phrases = ['Deep_grey', 'Brain_stem', 'Left_wm', 'Left_gm', 
               'Right_wm', 'Right_gm', 'Left_cerebellum', 'Right_cerebellum', 'Volumes']
    file_paths = {}
    
    if os.path.isdir(res_dir) and os.path.isdir(mas_dir):
        for f in sorted(os.listdir(res_dir)):
            for index, phrase in enumerate(phrases):
                if phrase in f:
                    file_paths[phrase] = os.path.join(res_dir, f)
                    
    res_dict = None 
    clean_res = {}
    for phrase, path in file_paths.items():
        with open(path, 'r') as f:
            res_dict = json.load(f)
            
        if phrase == 'Volumes':
            for key, value in res_dict.items():
                index = 'Volume_' + key
                clean_res[index] = res_dict[key]

            #clean_res.update(res_dict)
            continue
            
        for key, value in res_dict.items():
            index = phrase + '_' + key
            if 'original_firstorder' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_shape' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_glcm' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_glrlm' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value

    df = pd.DataFrame.from_dict(clean_res, orient = 'index', columns = [subj])
    all_results = all_results.join(df, how = 'outer')
    
# adds TBV row to df    
TBV = all_results.iloc[-8:,:].sum(axis=0)
all_results.loc['Total_Brain_Volume'] = TBV
  
    
# add binary labels for structure regions
# will give slice warning, but can ignore
bin_df = all_results.iloc[-9:-1,:].copy()
avg = bin_df.mean(axis=1)
sd = bin_df.std(axis=1)
multi = 1     #can change if desired range is different
high = avg + (multi * sd)
low = avg - (multi * sd)

for enum, (ind, vals) in enumerate(bin_df.iteritems()):
    bin_df[ind].loc[(bin_df[ind] < low)] = -1
    bin_df[ind].loc[(bin_df[ind] > high)] = 1
    bin_df[ind].loc[(bin_df[ind] < high) & (bin_df[ind] > low) ] = 0


new_ind = []
for ind in bin_df.index:
    new = 'small(-1)_large(1)_normal(0)_'+ ind
    new_ind.append(new)
bin_df.index = new_ind


all_results = all_results.append(bin_df)

100%|█████████████████████████████████████| 1112/1112 [00:12<00:00, 86.36it/s]


In [3]:
save_loc = os.path.join(storage_dir, '2021-11-23_home_results.csv')
all_results.to_csv(save_loc, sep = ',', header = True, index=True)

In [4]:
all_results

Unnamed: 0,100206,100307,100408,100610,101006,101107,101309,101410,101915,102008,...,987074,987983,989987,990366,991267,992673,992774,993675,994273,995174
Brain_stem_original_shape_Elongation,0.540240,0.504373,0.517729,0.545503,0.514219,0.488660,0.478099,0.465554,0.461718,0.467452,...,0.499520,0.463512,0.493516,0.473042,0.513634,0.493027,0.526041,0.553072,0.551787,0.480321
Brain_stem_original_shape_Flatness,0.438991,0.424063,0.467850,0.507799,0.428601,0.445808,0.367742,0.446798,0.399312,0.419881,...,0.435413,0.378950,0.469981,0.398059,0.439474,0.448079,0.415814,0.496455,0.456162,0.437853
Brain_stem_original_shape_LeastAxisLength,27.560738,29.844921,31.908535,31.150247,27.861488,28.312736,26.610978,29.345464,28.366818,30.453162,...,27.668875,25.969980,30.156694,29.060565,27.509858,28.535771,26.136545,29.419696,29.599786,30.036207
Brain_stem_original_shape_MajorAxisLength,62.781983,70.378498,68.202430,61.343705,65.005692,63.508825,72.363212,65.679429,71.039314,72.528150,...,63.546262,68.531343,64.165711,73.005685,62.597235,63.684684,62.856384,59.259513,64.888789,68.598891
Brain_stem_original_shape_Maximum2DDiameterColumn,60.216276,61.232997,63.139844,59.285495,63.015553,58.306259,65.099999,60.883905,58.167430,61.615906,...,55.353138,55.353138,60.346333,65.115051,55.370840,55.317718,60.301657,56.039360,64.495037,60.216276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
small(-1)_large(1)_normal(0)_Volume_Right_wm,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,1.000000,...,-1.000000,0.000000,-1.000000,1.000000,0.000000,0.000000,-1.000000,0.000000,1.000000,0.000000
small(-1)_large(1)_normal(0)_Volume_Right_gm,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000,1.000000,0.000000,0.000000,1.000000,...,0.000000,0.000000,-1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
small(-1)_large(1)_normal(0)_Volume_Left_wm,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,...,-1.000000,0.000000,-1.000000,1.000000,0.000000,0.000000,-1.000000,0.000000,0.000000,0.000000
small(-1)_large(1)_normal(0)_Volume_Left_gm,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000,1.000000,0.000000,0.000000,1.000000,...,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000


In [5]:
counts = all_results.iloc[-8:,:].copy()
for i, s in zip([-1, 0, 1], ['Small', 'Normal', 'Large']):    
    res = (counts.iloc[:,:] == i).sum(axis = 1)
    print("Number of {}({}) values:\n\n{}\n\n".format(s, i, res))

Number of Small(-1) values:

small(-1)_large(1)_normal(0)_Volume_Right_cerebellum    169
small(-1)_large(1)_normal(0)_Volume_Left_cerebellum     177
small(-1)_large(1)_normal(0)_Volume_Deep_grey           176
small(-1)_large(1)_normal(0)_Volume_Right_wm            179
small(-1)_large(1)_normal(0)_Volume_Right_gm            177
small(-1)_large(1)_normal(0)_Volume_Left_wm             166
small(-1)_large(1)_normal(0)_Volume_Left_gm             181
small(-1)_large(1)_normal(0)_Volume_Brain_stem          182
dtype: int64


Number of Normal(0) values:

small(-1)_large(1)_normal(0)_Volume_Right_cerebellum    752
small(-1)_large(1)_normal(0)_Volume_Left_cerebellum     741
small(-1)_large(1)_normal(0)_Volume_Deep_grey           756
small(-1)_large(1)_normal(0)_Volume_Right_wm            740
small(-1)_large(1)_normal(0)_Volume_Right_gm            749
small(-1)_large(1)_normal(0)_Volume_Left_wm             766
small(-1)_large(1)_normal(0)_Volume_Left_gm             743
small(-1)_large(1)_normal(0