# setup

In [None]:
import warnings
warnings.filterwarnings("ignore")

import os
import numpy as np
import pandas as pd
from nilearn import image

In [None]:
nb_name = '0_feature_matrices_and_metadata'

series_path = os.path.join(
    '/jukebox/ntb/users/jwammes/sketchloop/', 
    'subjects/{}_neurosketch/analysis/firstlevel/', 
    'parameter/{}_neurosketch_3mm_drawing_run_{}_filtfuncHIRES.nii.gz')

timepoint_path = '/jukebox/ntb/projects/sketchloop02/subjects/{}_neurosketch/regressor/run_{}/{}.txt'
feature_path = '/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/{}_featurematrix.npy'
metadata_path = '/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/metadata_{}_drawing.csv'

subjects = ['0110171', '0110172', '0111171', '0112171', '0112172', '0112173',
            '0113171', '0115174', '0117171', '0118171', '0118172', '0119171',
            '0119172', '0119173', '0119174', '0120171', '0120172', '0120173',
            '0123171', '0123173', '0124171', '0125171', '0125172', '1121161',
            '1130161', '1202161', '1203161', '1206161', '1206162', '1206163',
            '1207162']

# feature and metadata extraction

In [None]:
for subject in subjects:
    for phase in ['12', '34', '56']:
        
        # initialize data columns
        subj = [subject] * 160
        label = []
        run_num = [phase[0]]*80 + [phase[1]]*80
        TR_num = []
        features = []
        
        for r in phase:
            run = int(r)
            
            # load subject's time series for this run
            timeseries = image.load_img(series_path.format(subject, subject, run))
            timeseries = timeseries.get_data().transpose((3, 0, 1, 2))
            
            # use information in regressor/run_x folder to make hasImage vector
            # associated TR is just the hasImage index, converted to a float
            hasImage = [0]*240
            for cope in ['bed', 'bench', 'chair', 'table']:
                with open(timepoint_path.format(subject, r, cope)) as f:
                    times = [line.split(' ')[0] for line in f.read().split('\n')[:-1]]
                    for t in times:
                        tr = float(t)/1.5
                        if cope == 'bed':
                            hasImage[int(tr)] = 1
                        elif cope == 'bench':
                            hasImage[int(tr)] = 2
                        elif cope == 'chair':
                            hasImage[int(tr)] = 3
                        elif cope == 'table':
                            hasImage[int(tr)] = 4

            # wherever hasImage, we want the features
            count = 0
            for i, has in enumerate(hasImage): # 80 times
                if has != 0:
                    count += 1
                    features.append(timeseries[i+3])

            # gotta track label and TR_num
            for index, value in enumerate(hasImage):
                if value != 0:
                    TR_num.append(float(index))
                    if value == 1:
                        label.append('bed')
                    elif value == 2:
                        label.append('bench')
                    elif value == 3:
                        label.append('chair')
                    elif value == 4:
                        label.append('table')
            
        # let's save results
        ## feature matrix
        features = np.stack(features,axis=1) # must transpose later bc this mistake
        np.save(file=feature_path.format(subject), arr=features)
        
        ## metadata
        x = pd.DataFrame([subj, label, run_num, TR_num]) # where each of those variables are lists of the same length
        x = x.transpose()
        x.columns = ['subj','label','run_num', 'TR_num']
        x.to_csv(metadata_path.format(subject))