In [None]:
import numpy as np
import pandas as pd
import pickle
import json
import os, sys, re, time
from tqdm import tqdm

code_dir = '/'.join(os.getcwd().split('/')[:-1])
main_code = os.path.join(code_dir, 'main_code')

sys.path.append(main_code)
import locations


In [None]:
storage_dir = '' #Storage dir from #2 EXtract_and_radiomics

dirs = os.listdir(storage_dir)
dirs = [os.path.join(storage_dir, x) for x in dirs if os.path.isdir(os.path.join(storage_dir, x))]
dirs.sort(key = lambda x: int(os.path.basename(x)))


all_results = pd.DataFrame()

for p in tqdm(dirs):
    subj = os.path.basename(p)
    res_dir = os.path.join(p,'results')
    mas_dir = os.path.join(p, 'structure_masks')

    phrases = ['Deep_grey', 'Brain_stem', 'Left_wm', 'Left_gm', 
               'Right_wm', 'Right_gm', 'Left_cerebellum', 'Right_cerebellum', 'Volumes']
    file_paths = {}
    
    if os.path.isdir(res_dir) and os.path.isdir(mas_dir):
        for f in sorted(os.listdir(res_dir)):
            for index, phrase in enumerate(phrases):
                if phrase in f:
                    file_paths[phrase] = os.path.join(res_dir, f)
                    
    res_dict = None 
    clean_res = {}
    for phrase, path in file_paths.items():
        with open(path, 'r') as f:
            res_dict = json.load(f)
            
        if phrase == 'Volumes':
            for key, value in res_dict.items():
                index = 'Volume_' + key
                clean_res[index] = res_dict[key]

            #clean_res.update(res_dict)
            continue
            
        for key, value in res_dict.items():
            index = phrase + '_' + key
            if 'original_firstorder' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_shape' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_glcm' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value
            elif 'original_glrlm' in key:
                #print("{:>50s}\t{:5.4f}".format(key, value))
                clean_res[index] = value

    df = pd.DataFrame.from_dict(clean_res, orient = 'index', columns = [subj])
    all_results = all_results.join(df, how = 'outer')
    
# adds TBV row to df    
TBV = all_results.iloc[-8:,:].sum(axis=0)
all_results.loc['Total_Brain_Volume'] = TBV
  
    
# add binary labels for structure regions
# will give slice warning, but can ignore
bin_df = all_results.iloc[-9:-1,:].copy()
avg = bin_df.mean(axis=1)
sd = bin_df.std(axis=1)
multi = 1     #can change if desired range is different
high = avg + (multi * sd)
low = avg - (multi * sd)

for enum, (ind, vals) in enumerate(bin_df.iteritems()):
    bin_df[ind].loc[(bin_df[ind] < low)] = -1
    bin_df[ind].loc[(bin_df[ind] > high)] = 1
    bin_df[ind].loc[(bin_df[ind] < high) & (bin_df[ind] > low) ] = 0


new_ind = []
for ind in bin_df.index:
    new = 'small(-1)_large(1)_normal(0)_'+ ind
    new_ind.append(new)
bin_df.index = new_ind


all_results = all_results.append(bin_df)

In [None]:
save_data = locations.get_locations('save_date')
file_name = f'{save_date}_home_results.csv'
save_loc = os.path.join(storage_dir, file_name)
all_results.to_csv(save_loc, sep = ',', header = True, index=True)

In [None]:
counts = all_results.iloc[-8:,:].copy()
for i, s in zip([-1, 0, 1], ['Small', 'Normal', 'Large']):    
    res = (counts.iloc[:,:] == i).sum(axis = 1)
    print("Number of {}({}) values:\n\n{}\n\n".format(s, i, res))