In [None]:
from __future__ import print_function
from scipy.io import loadmat
import numpy as np
import feather
import pandas as pd
import os
filesep = os.path.sep

## Save R compatible dataframes for HSF analysis
Please set `datDir` to the absolute path of the `StatDerivatives` folder. This script will read the modified z-score transformed and uniformly downsampled pointclouds from the `Reduced_MZS_Fibers` directory then save R compatible dataframes in the `HSF_Inputs` (relative) directory. These comparisons are made for equal parameter value combinations (4 alpha=sigma, paired combinations of 6).

You can download the dataset from here.

In [None]:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# PLEASE PROVIDE Stats_derivatives folder path if you are running this notebook locally
datDir = '/tmp/Stats_derivatives'

In [None]:
# Specify output directory w.r.t input directory

outDir = datDir + filesep + 'HSF_Inputs'
if not os.path.exists(outDir):
    os.mkdir(outDir)

def vectorize_mat(filename,region):
    mat = loadmat(filename)
    tmp = mat[region]
    tmp = tmp['scalars']
    tmp = np.squeeze(tmp[:,0][0])
    vec = tmp.tolist()
    return vec

def getHSFTable(sub,idxs):
    regs = ['p4','p3','p2','p1','m1','m2','d1','d2','d3','d4']
    vals = []
    conds=[]
    segs = []
    for ii in idxs:
        for reg in regs: 
            curVec = vectorize_mat(datDir + filesep + 'FiberStrain_mzscore_reduced' + filesep + 'sub' + sub + '_sigma' + str(ii) + '_alpha' + str(ii) + '_modzscore_reduced.mat',reg)
            vals.append(curVec)
            conds.append(['sigma' + str(ii) + '_alpha' + str(ii)]*len(curVec))
            segs.append([reg]*len(curVec))
                    
    vals = np.array(vals)
    conds = np.array(conds)
    segs = np.array(segs)  
    vals = vals.flatten()
    conds = conds.flatten()
    segs = segs.flatten()
    dat = {'str':vals,'conds':conds,'segments':segs}
    df = pd.DataFrame(data=dat)
    return df

def writeHSFEqual(subID):
    # Create a matching naming convention of equal parameter combinations 
    # to read feathers in R with easy semantics.
    lookup = [{'44_66':[4,6]},{'44_88':[4,8]},{'44_1010':[4,10]},{'66_88':[6,8]},{'66_1010':[6,10]},{'88_1010':[8,10]}]
    svFormat = '.file'
    for cd in lookup:
        for key, value in cd.items():
            cur_df = getHSFTable(subID,cd[key])
            print('Saving: ' + outDir + filesep + 'sub' + subID + '_reduced_' + key + svFormat)
            feather.write_dataframe(cur_df,outDir + filesep + 'sub' + subID + '_reduced_' + key + svFormat)
    print('DONE...')

In [None]:
writeHSFEqual('A')
writeHSFEqual('B')
writeHSFEqual('C')
writeHSFEqual('D')
writeHSFEqual('E')