This notebook can be used to load raw ultraleap data, 
save cleaned dataframes for each block, 
and generate dataframes of distances for further feature extraction

Import public packages and functions

In [2]:
import os
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from itertools import compress


import openpyxl
from datetime import datetime
import math
import statistics as stat
import json

In [3]:
def get_repo_path_in_notebook():
    """
    Finds path of repo from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()
    repo_name = 'ultraleap_analysis'

    while path[-len(repo_name):] != 'ultraleap_analysis':

        path = os.path.dirname(path)

    return path


In [4]:
repo_path = get_repo_path_in_notebook()
code_path = os.path.join(repo_path, 'code')
os.chdir(code_path)

Import own functions

In [95]:
import import_data.import_and_convert_data as import_dat
import import_data.find_paths as find_paths
import import_data.preprocessing_meta_info as meta_info
import sig_processing.segment_tasks as tasks
import movement_calc.helpfunctions as hp
import feature_extraction.get_features as get_feat
import feature_extraction.get_files as get_files

### Loading blocks for feature extraction

Reloading own functions

In [358]:
importlib.reload(import_dat)
importlib.reload(tasks)
importlib.reload(find_paths)
importlib.reload(meta_info)
importlib.reload(hp)
importlib.reload(get_feat)
importlib.reload(get_files)

<module 'feature_extraction.get_files' from '/Users/arianm/Documents/GitHub/ultraleap_analysis/code/feature_extraction/get_files.py'>

Define variables of interest

In [387]:
folder = 'patientdata'
conds = ['m1', 'm0s0', 'm0s1', 'm1s0', 'm1s1']
cams = ['dt', 'vr',  'st']
tasks = ['ft', 'oc']
sides = ['left', 'right']
subs = find_paths.find_available_subs(folder)
subs.sort()

Saving features per block as json files


In [407]:
for sub in subs:
    for cond in conds:
        for task in tasks:
            try:
                files = os.listdir(os.path.join(repo_path, 'data','raw_blocks', sub, task, cond, 'dist'))

            except FileNotFoundError:
                continue 
            
            for file in files:

                if file == '.DS_Store':
                    continue

                # Load blocks from patients' blocks dir
                block = pd.read_csv(os.path.join(
                    repo_path, 'data', 'raw_blocks', sub, task, cond, 'dist', file))

                block_features = get_feat.features_across_block(block, task)

                feat_path = os.path.join(repo_path, 'data', 'features', 'feat_dict', sub, task, cond,)
                if not os.path.exists(feat_path): os.makedirs(feat_path)
                        
                get_files.savedict_as_json(feat_path, f'{file}', block_features)

Saving feature blocks per task as csv files

In [405]:
# the function below assumes all the data is placed in one folder and nothing else is placed in this folder. 
# Maybe adjust to do something like "for i in subject_ids: with open(os.path.join(path, "shows_", i, ".json")) as f:"

def create_feat_df_per_task(path = str):
    data_all = pd.DataFrame()
    for file in os.listdir(path):
        with open(os.path.join(path, file)) as f:
            data = json.load(f)
            df = pd.DataFrame(data.values(), index=data.keys())
            df = df.T
            data_all = data_all.append(df, ignore_index = True)
    return data_all

In [409]:
ft_val = []
oc_val = []
for task in tasks:
    for sub in subs:
        for cond in conds:
            val = []
            try:
                files = os.listdir(os.path.join(repo_path, 'data','features', 'feat_dict', sub, task, cond))

            except FileNotFoundError:
                continue 

            for file in files:
                # Create new dictionary with filename as first key (used for first column in df later)
                feat_json = {'filename': f'{file}'}

                # Load dictionary with bock features
                feat_path = os.path.join(repo_path, 'data', 'features', 'feat_dict', sub, task, cond, file)
                old_feat_json = get_files.loadjson_as_dict(feat_path)

                # Update the new dictionary with block features 
                feat_json |= old_feat_json

                # Make a list of dicts to later create a df based on task
                if task == 'ft':
                    ft_val.append(feat_json)
                else:
                    oc_val.append(feat_json)

            ft_feat_df = pd.DataFrame(ft_val)
            oc_feat_df = pd.DataFrame(oc_val)

            feat_df_path = os.path.join(repo_path, 'data', 'features', 'dataframes')
            if not os.path.exists(feat_df_path): os.makedirs(feat_df_path)

            ft_feat_df.to_csv(os.path.join(feat_df_path, 'ft_block_features.csv'))
            oc_feat_df.to_csv(os.path.join(feat_df_path, 'oc_block_features.csv'))


In [403]:
ft_feat_df

Unnamed: 0,filename,num_events,mean_max_amp,sd_max_amp,coef_var_max_amp,slope_max_amp,mean_max_vel,sd_max_vel,coef_var_max_vel,slope_max_vel,...,mean_tap_dur,sd_tap_dur,coef_var_tap_dur,slope_tap_dur,mean_rms,sd_rms,sum_rms,mean_nrms,sd_nrms,sum_nrms
0,feat_b3_ul001_m1_dt_ft_right.json,22,0.086938,0.018638,0.214378,-0.00149,1.895883,0.490726,0.258838,-0.026876,...,0.381526,0.095752,0.250971,-0.002073,0.056258,0.00818,1.181426,0.160251,0.057567,3.365262
1,feat_b1_ul001_m1_dt_ft_left.json,26,0.126904,0.003875,0.030537,0.000134,2.268218,0.387295,0.170749,-0.014895,...,0.316633,0.040677,0.128467,0.001372,0.077489,0.00583,1.937233,0.250347,0.047922,6.258676
2,feat_b1_ul001_m1_vr_ft_left.json,28,0.132878,0.017873,0.134506,-0.000924,3.185863,0.65872,0.206763,-0.025019,...,0.330214,0.103222,0.31259,0.004987,0.086878,0.011431,2.345702,0.280631,0.061556,7.577044
3,feat_b1_ul001_m1_vr_ft_right.json,32,0.139681,0.005826,0.041713,-0.000427,3.522246,0.625929,0.177707,-0.02769,...,0.281854,0.03606,0.127938,0.00162,0.090552,0.007016,2.807101,0.327962,0.057698,10.166813
4,feat_b1_ul001_m1_dt_ft_right.json,30,0.082066,0.019172,0.23362,-0.001661,2.161654,1.243711,0.575352,-0.080545,...,0.321793,0.067316,0.20919,-0.000184,0.054248,0.009723,1.573187,0.177291,0.054611,5.141437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,feat_b1_ul019_m0s1_dt_ft_left.json,1,,,,,,,,,...,,,,,,,,,,
158,feat_b2_ul019_m0s1_dt_ft_right.json,4,0.015241,0.001204,0.078977,-0.000521,0.29833,0.043425,0.145561,0.052628,...,2.423484,1.57468,0.649759,-0.801336,0.008871,0.001576,0.026614,0.006732,0.005021,0.020197
159,feat_b2_ul019_m0s1_dt_ft_left.json,6,0.041851,0.006241,0.149133,-0.003122,0.290302,0.07904,0.272269,-0.01705,...,1.788183,1.753827,0.980787,0.401338,0.030295,0.003515,0.151474,0.041643,0.035856,0.208217
160,feat_b1_ul019_m0s1_dt_ft_right.json,3,0.032887,0.003017,0.091731,-0.006034,0.32599,0.093892,0.28802,-0.187783,...,2.50501,2.064128,0.824,4.128257,0.023534,0.001917,0.047068,0.031229,0.026497,0.062457
