This notebook can be used to load raw ultraleap data, 
save cleaned dataframes for each block, 
and generate dataframes of distances for further feature extraction

Import public packages and functions

In [1]:
import os
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from itertools import compress


import openpyxl
from datetime import datetime
import math
import statistics as stat
import json

In [2]:
def get_repo_path_in_notebook():
    """
    Finds path of repo from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()
    repo_name = 'ultraleap_analysis'

    while path[-len(repo_name):] != 'ultraleap_analysis':

        path = os.path.dirname(path)

    return path


In [3]:
repo_path = get_repo_path_in_notebook()
code_path = os.path.join(repo_path, 'code')
os.chdir(code_path)

Import own functions

In [4]:
import import_data.import_and_convert_data as import_dat
import import_data.find_paths as find_paths
import import_data.preprocessing_meta_info as meta_info
import sig_processing.segment_tasks as seg_tasks
import movement_calc.helpfunctions as hp
import feature_extraction.get_features as get_feat
import feature_extraction.get_files as get_files

### Loading blocks for feature extraction

Reloading own functions

In [5]:
importlib.reload(import_dat)
importlib.reload(seg_tasks)
importlib.reload(find_paths)
importlib.reload(meta_info)
importlib.reload(hp)
importlib.reload(get_feat)
importlib.reload(get_files)

<module 'feature_extraction.get_files' from '/Users/arianm/Documents/GitHub/ultraleap_analysis/code/feature_extraction/get_files.py'>

Define variables of interest

In [8]:
folder = 'patientdata'
conds = ['m1', 'm0s0', 'm0s1', 'm1s0', 'm1s1']
cams = ['dt', 'vr',  'st']
tasks = ['ft', 'oc']
sides = ['left', 'right']
subs = find_paths.find_available_subs(folder)
subs.sort()

Saving features per block as json files


In [9]:
for sub in subs:
    for task in tasks:
        try:
            files = os.listdir(os.path.join(repo_path, 'data','distances', folder, sub, task))

        except FileNotFoundError:
            continue 
        
        for file in files:

            if file == '.DS_Store':
                continue

            # Load blocks from patients' blocks dir
            block = pd.read_csv(os.path.join(
                repo_path, 'data', 'distances', folder, sub, task, file), index_col= 0)

            block_features = get_feat.features_across_block(block, task)

            feat_path = os.path.join(repo_path, 'data', 'features', 'feat_dict', folder, sub, task)
            if not os.path.exists(feat_path): os.makedirs(feat_path)
                    
            get_files.savedict_as_json(feat_path, f'{file}', block_features)

Saving feature blocks per task as csv files

In [12]:
# Another idea for csv saving function. Maybe revisit at later stage!
# the function below assumes all the data is placed in one folder and nothing else is placed in this folder. 
# Adjust to do something like "for i in subject_ids: with open(os.path.join(path, "shows_", i, ".json")) as f:"

def create_feat_df_per_task(path = str):
    data_all = pd.DataFrame()
    for file in os.listdir(path):
        with open(os.path.join(path, file)) as f:
            data = json.load(f)
            df = pd.DataFrame(data.values(), index=data.keys())
            df = df.T
            data_all = data_all.append(df, ignore_index = True)
    return data_all

In [12]:
ft_val = []
oc_val = []
for task in tasks:
    for sub in subs:

        try:
            files = os.listdir(os.path.join(repo_path, 'data','features', 'feat_dict', folder, sub, task))

        except FileNotFoundError:
            continue 

        for file in files:
            # Create new dictionary with filename as first key (used for first column in df later)
            feat_json = {'filename': f'{file}'}

            # Load dictionary with bock features
            feat_path = os.path.join(repo_path, 'data', 'features', 'feat_dict', folder, sub, task, file)
            old_feat_json = get_files.loadjson_as_dict(feat_path)

            # Update the new dictionary with block features 
            feat_json |= old_feat_json

            # Make a list of dicts to later create a df based on task
            if task == 'ft':
                ft_val.append(feat_json)
            elif task == 'oc':
                oc_val.append(feat_json)

        ft_feat_df = pd.DataFrame(ft_val)
        ft_feat_df.dropna(subset=ft_feat_df.columns[ft_feat_df.isna().any()], inplace=True)
        oc_feat_df = pd.DataFrame(oc_val)
        oc_feat_df.dropna(subset=oc_feat_df.columns[oc_feat_df.isna().any()], inplace=True)


        feat_df_path = os.path.join(repo_path, 'data', 'features', 'dataframes', folder)
        if not os.path.exists(feat_df_path): os.makedirs(feat_df_path)

        ft_feat_df.to_csv(os.path.join(feat_df_path, 'ft_block_features.csv'))
        oc_feat_df.to_csv(os.path.join(feat_df_path, 'oc_block_features.csv'))
