This notebook can be used to load raw ultraleap data, 
save cleaned dataframes for each block, 
and generate dataframes of distances for further feature extraction

Import public packages and functions

In [1]:
import os
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from itertools import compress


import openpyxl
from datetime import datetime
import math
import statistics as stat
import json

In [2]:
def get_repo_path_in_notebook():
    """
    Finds path of repo from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()
    repo_name = 'ultraleap_analysis'

    while path[-len(repo_name):] != 'ultraleap_analysis':

        path = os.path.dirname(path)

    return path


In [3]:
repo_path = get_repo_path_in_notebook()
code_path = os.path.join(repo_path, 'code')
os.chdir(code_path)

Import own functions

In [4]:
import import_data.import_and_convert_data as import_dat
import import_data.find_paths as find_paths
import import_data.preprocessing_meta_info as meta_info
import sig_processing.segment_tasks as seg_tasks
import movement_calc.helpfunctions as hp
import feature_extraction.get_features as get_feat
import feature_extraction.get_files as get_files

### 1. Loading Features (X) and scores (y)

Load features from csv


In [54]:
feat_df_path = os.path.join(repo_path, 'data', 'features', 'dataframes', 'patientdata')

X_df = pd.read_csv(os.path.join(feat_df_path, 'ft_block_features.csv'), index_col=0)

Load scores / labels

In [33]:
def get_labels_for_feat_df(ft_df):

    y = []  # list to store labels

    ids = X_df['filename']
    if ids[0].startswith('feat'): ids = [x[5:-5] for x in ids]
    else: ids = [x[:-5] for x in ids]

    ids = [x.split('_') for x in ids]

    for id_list in ids:
        block, sub, cond, cam, task, side = id_list
        value = get_files.get_scores(sub, cond, cam, task, side, block)
        y.append(value)

    return y

In [76]:
y = get_labels_for_feat_df(X_df)

y = np.array(y)

### 2. Select which features you want to use !!


In [72]:
# leave out json name
X = X_df.values[:, 1:]


### 3. Data splitting, create training and test data


In [37]:
# import data splitting functions
from sklearn.model_selection import StratifiedKFold, KFold

In [74]:
# kf = KFold(n_splits=4, )

skf = StratifiedKFold(n_splits=4)

# for i, (train_index, test_index) in enumerate(kf.split(X)):
#     print(f"Fold {i}:")
#     print(f"  Train: index={train_index}")
#     print(f"  Test:  index={test_index}")

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")   

Fold 0:
  Train: index=[ 3  4  7  8  9 10 11 14 15 16 17 18 19 20 21 22 23 24]
  Test:  index=[ 0  1  2  5  6 12 13]
Fold 1:
  Train: index=[ 0  1  2  5  6  8 11 12 13 14 15 16 17 18 20 21 22 23 24]
  Test:  index=[ 3  4  7  9 10 19]
Fold 2:
  Train: index=[ 0  1  2  3  4  5  6  7  9 10 12 13 16 18 19 21 22 23 24]
  Test:  index=[ 8 11 14 15 17 20]
Fold 3:
  Train: index=[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 17 19 20]
  Test:  index=[16 18 21 22 23 24]


### 3. Define classifiers

In [85]:
# import clsasifiers
from sklearn.svm import LinearSVC

# try multiclass with Random Forest


In [86]:
# import metrics
from sklearn.metrics import accuracy_score

In [75]:
clf = LinearSVC()

### 4. Run Cross validation

In [90]:
skf = StratifiedKFold(n_splits=4)

# go from multiclass to binary
# y_binary = y > 1

# y = y_binary

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    
    # loops over all folds

    # get training and testing split for current fold
    train_X, test_X = X[train_index], X[test_index]
    train_y, test_y = y[train_index], y[test_index]
    
    # train classifier with train X and y
    clf.fit(train_X, train_y)

    y_pred = clf.predict(test_X)
    y_true = test_y

    print(accuracy_score(y_true, y_pred))

    # print(y_true)

0.5714285714285714
[False False False  True  True  True  True]
0.5
[False False False  True  True  True]
0.6666666666666666
[False  True False  True  True  True]
0.6666666666666666
[False  True  True False  True  True]


