In [1]:
import os.path as op
import scipy.io as sio
import pandas as pd
import numpy as np

In [2]:
mat_file_name = '/home/jyeatman/git/AFQ/afq.mat'
subject_ids=None
stats=None      
out_file=None

In [3]:
afq = sio.loadmat(mat_file_name, squeeze_me=True)['afq']
vals = afq['vals'].item()
tract_ids = afq['fgnames'].item()

In [4]:
afq['sub_ids'].item().shape

(6,)

In [5]:
n_tracts = len(tract_ids)
if stats is None:
    stats = list(vals.dtype.fields.keys())
columns = ['subjectID', 'tractID', 'nodeID']
columns = columns + stats
df = pd.DataFrame(columns=columns)
n_subjects, nodes_per_tract = vals[stats[0]].item()[0].shape

# Check if subject ids is defined in the afq structure
if subject_ids is None:
    if 'sub_ids' in afq.dtype.fields.keys() and len(afq['sub_ids'].item()):
        subject_ids = afq['sub_ids'].item()
    else:  
    # XXX Make the number of zeros flexible and depend on n_subjects:
        subject_ids = ['subject_%03d' % i for i in range(n_subjects)]

In [6]:
subject_ids

array([u'patient_01', u'patient_02', u'patient_03', u'control_01',
       u'control_02', u'control_03'], dtype=object)

In [7]:
# Loop over subjects
for subject in range(len(subject_ids)):
    # Loop over tracts
    for tract in range(n_tracts):
        # Making a subject and tract specific dataframe
        subj_df = pd.DataFrame(
                columns=['subjectID', 'tractID', 'nodeID'],
                data=np.array([[subject_ids[subject]] * nodes_per_tract,
                               [tract_ids[tract]] * nodes_per_tract,
                               np.arange(nodes_per_tract)]).T)
        # We're looping over the desired stats (eg fa, md) and adding them to the subjects dataframe
        for stat in stats:
            scalar = vals[stat].item()[tract][subject,:]
            subj_df[stat] = scalar
        # The subject's dataframe for this tract is now appended to the whole dataframe here
        df = df.append(subj_df)
# Create the file
if out_file is None:
    out_file = op.join('.', 'nodes.csv')
# Write to file
df.to_csv(out_file, index=False)
df.shape

(12000, 11)

In [10]:
# Create metadata
metadata = afq['metadata'].item()
metadata

array(([1, 1, 1, 1, 1, 1], [1, 1, 1, 0, 0, 0], [0.19476428956704928, 0.2259217809723988, 0.1707080471478586, 0.2276642978165535, 0.4356986841038991, 0.31110228665041284]), 
      dtype=[('session', 'O'), ('patient', 'O'), ('score', 'O')])