In [1]:
import os
from os.path import join, pardir, curdir
import glob
import numpy as np
import pandas as pd
import scipy as sp
from tqdm import tqdm
from matplotlib import pyplot as plt
from joblib import load, dump
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, precision_score, classification_report

# import warnings
# warnings.filterwarnings('ignore')

# Set current directory to "src"
os.chdir(join(os.getcwd(), os.pardir, "src"))
print(f"Current working directory: {os.getcwd()}")

from feature_extractor import *
from process import *

Current working directory: /home/fa926284/Documents/Parkinson_FOG_Detection/src


In [2]:
DATASETS = ['tdcsfog', 'defog']
DATA_PATH = join(pardir, 'data')
PROCESSED_DATA_PATH = join(pardir, "data", "processed_dl")
FILE_DIRS = []
TRAIN_DIRS = []
VAL_DIRS = []
TEST_DIRS = []

RANDOM_SEED = 42

if not os.path.isdir(PROCESSED_DATA_PATH):
    os.mkdir(PROCESSED_DATA_PATH)

for dataset in DATASETS:
    FILE_DIRS.extend(glob.glob(join(DATA_PATH, "train", dataset, "*.csv"))[:])
    TEST_DIRS.extend(glob.glob(join(DATA_PATH, "test", dataset, "*.csv"))[:])
TRAIN_DIRS, VAL_DIRS = random_split_list(FILE_DIRS,
                                         random_seed=42,
                                         split=0.98)

print(f"Total subjects: {len(FILE_DIRS)}")
print(f"Train subjects: {len(TRAIN_DIRS)}")
print(f"Validation subjects: {len(VAL_DIRS)}")
print(f"Test subjects: {len(TEST_DIRS)}")


Total subjects: 924
Train subjects: 905
Validation subjects: 19
Test subjects: 2


In [3]:
# if not os.path.isfile(join(PROCESSED_DATA_PATH, "X_train_df.joblib")):
print(f"\n\nProcessing Train Dataset ... ", end='\n\n')

X_train = []
y_train = []

for file_dir in tqdm(TRAIN_DIRS[500:]):
    df = pd.read_csv(file_dir)
    
    if 'Task' in df.columns and 'Valid' in df.columns:
        df = df[(df['Valid'] == True) & (df['Task'] == True)]
        df = df.drop(['Valid', 'Task'], axis=1).reset_index()

    df['label'] = (df['StartHesitation'] * 1) | (df['Turn']
                                                 * 2) | (df['Walking'] * 3)
    df = df.astype({'label': 'int'})

    acc_ap_win, label = get_windowed_data(df['AccAP'].to_numpy(),
                                          df['label'].to_numpy(),
                                          win_len=100, slide_step=1)
    acc_v_win, _ = get_windowed_data(df['AccV'].to_numpy(),
                                     df['label'].to_numpy(),
                                     win_len=100, slide_step=1)
    acc_ml_win, _ = get_windowed_data(df['AccML'].to_numpy(),
                                      df['label'].to_numpy(),
                                      win_len=100, slide_step=1)

    features = np.stack([acc_ap_win, acc_v_win, acc_ml_win], axis=-1)
    label = df['label'].to_numpy()
    X_train.append(features)
    y_train.append(label)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)
dump(X_train, join(PROCESSED_DATA_PATH, "X_train2.joblib"))
dump(y_train, join(PROCESSED_DATA_PATH, "y_train2.joblib"))

# else:
#     X_train_df = load(join(PROCESSED_DATA_PATH, "X_train_df.joblib"))
#     X_train = load(join(PROCESSED_DATA_PATH, "X_train.joblib"))
#     y_train = load(join(PROCESSED_DATA_PATH, "y_train.joblib"))




Processing Train Dataset ... 



100%|██████████| 405/405 [00:22<00:00, 18.33it/s]


['../data/processed_dl/y_train2.joblib']

In [5]:
# if not os.path.isfile(join(PROCESSED_DATA_PATH, "X_train_df.joblib")):
print(f"\n\nProcessing Validation Dataset ... ", end='\n\n')

X_val = []
y_val = []

for file_dir in tqdm(VAL_DIRS):
    df = pd.read_csv(file_dir)

    if 'Task' in df.columns and 'Valid' in df.columns:
        df = df[(df['Valid'] == True) & (df['Task'] == True)]
        df = df.drop(['Valid', 'Task'], axis=1).reset_index()

    df['label'] = (df['StartHesitation'] * 1) | (df['Turn']
                                                 * 2) | (df['Walking'] * 3)
    df = df.astype({'label': 'int'})

    acc_ap_win, label = get_windowed_data(df['AccAP'].to_numpy(),
                                          df['label'].to_numpy(),
                                          win_len=100, slide_step=1)
    acc_v_win, _ = get_windowed_data(df['AccV'].to_numpy(),
                                     df['label'].to_numpy(),
                                     win_len=100, slide_step=1)
    acc_ml_win, _ = get_windowed_data(df['AccML'].to_numpy(),
                                      df['label'].to_numpy(),
                                      win_len=100, slide_step=1)

    features = np.stack([acc_ap_win, acc_v_win, acc_ml_win], axis=-1)
    label = df['label'].to_numpy()
    X_val.append(features)
    y_val.append(label)

X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)
dump(X_val, join(PROCESSED_DATA_PATH, "X_val.joblib"))
dump(y_val, join(PROCESSED_DATA_PATH, "y_val.joblib"))




Processing Train Dataset ... 



100%|██████████| 19/19 [00:00<00:00, 29.20it/s]


['../data/processed_dl/y_val.joblib']

In [6]:
# if not os.path.isfile(join(PROCESSED_DATA_PATH, "X_train_df.joblib")):
print(f"\n\nProcessing Test Dataset ... ", end='\n\n')

X_test = []

for file_dir in tqdm(TEST_DIRS):
    df = pd.read_csv(file_dir)

    if 'Task' in df.columns and 'Valid' in df.columns:
        df = df[(df['Valid'] == True) & (df['Task'] == True)]
        df = df.drop(['Valid', 'Task'], axis=1).reset_index()

    acc_ap_win, label = get_windowed_data(df['AccAP'].to_numpy(),
                                          None,
                                          win_len=100, slide_step=1)
    acc_v_win, _ = get_windowed_data(df['AccV'].to_numpy(),
                                     None,
                                     win_len=100, slide_step=1)
    acc_ml_win, _ = get_windowed_data(df['AccML'].to_numpy(),
                                      None,
                                      win_len=100, slide_step=1)

    features = np.stack([acc_ap_win, acc_v_win, acc_ml_win], axis=-1)
    X_test.append(features)

X_test = np.concatenate(X_test)
dump(X_test, join(PROCESSED_DATA_PATH, "X_test.joblib"))



Processing Test Dataset ... 



100%|██████████| 2/2 [00:00<00:00,  4.16it/s]


['../data/processed_dl/X_test.joblib']

: 