In [None]:
# import opensim as osim
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from OA_utils.data_utils import *
from OA_utils.OpenSimScripts import get_all_segments
import random
import pickle

Load data from file

In [2]:
OA_data_dir = "C:\\Users\\bakel\\Desktop\\GRFMuscleModel\\Old_Young_Walking_Data\\"
with open(OA_data_dir + 'resampled_compiled_segments', 'rb') as f:
    OA_segs = pickle.load(f)

Visualize amount of data present for each subject

In [10]:
overall_segs = 0
for subj, data in OA_segs.items():
    if subj == "time_resampled":
        continue 
    
    # pick any signal key that exists in every subject
    num_segments = len(data["grf_y"])
    overall_segs += num_segments
    print(f"{subj}: {num_segments} segments")
print(overall_segs, 'segments overall')

OA1: 42 segments
OA2: 43 segments
OA4: 41 segments
OA5: 39 segments
OA7: 45 segments
OA8: 44 segments
OA9: 42 segments
OA10: 39 segments
OA11: 25 segments
OA12: 38 segments
OA13: 44 segments
OA14: 43 segments
OA17: 37 segments
OA18: 39 segments
OA19: 25 segments
OA20: 22 segments
OA22: 44 segments
OA23: 44 segments
OA24: 38 segments
OA25: 44 segments
778 segments overall


Split data by shuffling subjects

In [17]:
subjects = [s for s in OA_segs.keys() if s != "time_resampled"]
random.seed(42)
subjects_shuffled = subjects.copy()
random.shuffle(subjects_shuffled)
N = len(subjects_shuffled)
train_end =  int(N * 0.8)
val_end = int(N * (0.9))
train_subjs = subjects_shuffled[:train_end]
val_subjs = subjects_shuffled[train_end:val_end]
test_subjs = subjects_shuffled[val_end:]
train_data = {s: OA_segs[s] for s in train_subjs}
val_data   = {s: OA_segs[s] for s in val_subjs}
test_data  = {s: OA_segs[s] for s in test_subjs}

time_resampled = OA_segs["time_resampled"]

Display segment counts per split

In [18]:
train_out = {}
example_subj = next(iter(train_data))
for key in train_data[example_subj].keys():
    train_out[key] = sum(len(train_data[subj][key]) for subj in train_data)
val_out = {}
example_subj = next(iter(val_data))
for key in val_data[example_subj].keys():
    val_out[key] = sum(len(val_data[subj][key]) for subj in val_data)
test_out = {}
example_subj = next(iter(test_data))
for key in test_data[example_subj].keys():
    test_out[key] = sum(len(test_data[subj][key]) for subj in test_data)

print("Train:", train_out)
print("Val:",   val_out)
print("Test:",  test_out)

Train: {'grf_x': 633, 'grf_y': 633, 'grf_z': 633, 'tibpost': 633, 'tibant': 633, 'edl': 633, 'ehl': 633, 'fdl': 633, 'fhl': 633, 'gaslat': 633, 'gasmed': 633, 'soleus': 633, 'perbrev': 633, 'perlong': 633, 'achilles': 633}
Val: {'grf_x': 64, 'grf_y': 64, 'grf_z': 64, 'tibpost': 64, 'tibant': 64, 'edl': 64, 'ehl': 64, 'fdl': 64, 'fhl': 64, 'gaslat': 64, 'gasmed': 64, 'soleus': 64, 'perbrev': 64, 'perlong': 64, 'achilles': 64}
Test: {'grf_x': 81, 'grf_y': 81, 'grf_z': 81, 'tibpost': 81, 'tibant': 81, 'edl': 81, 'ehl': 81, 'fdl': 81, 'fhl': 81, 'gaslat': 81, 'gasmed': 81, 'soleus': 81, 'perbrev': 81, 'perlong': 81, 'achilles': 81}


In [None]:
def dict_to_array(split_dict):
    packed_segments = []
    for subj, data in split_dict.items():
        num_segs = len(data['grf_x']) 
        for i in range(num_segs):
            sample = np.column_stack([
                data['grf_x'][i],
                data['grf_y'][i],
                data['grf_z'][i],
                data['tibpost'][i],
                data['tibant'][i],
                data['edl'][i],
                data['ehl'][i],
                data['fdl'][i],
                data['fhl'][i],
                data['perbrev'][i],
                data['perlong'][i],
                data['achilles'][i],
            ])
            packed_segments.append(sample)
    return np.array(packed_segments)
    

In [26]:
train_arr = dict_to_array(train_data)
val_arr = dict_to_array(val_data)
test_arr = dict_to_array(test_data)
print("Train:", train_arr.shape)
print("Val:", val_arr.shape)
print("Test:", test_arr.shape)

Train: (633, 100, 12)
Val: (64, 100, 12)
Test: (81, 100, 12)


In [27]:
X_train, y_train = train_arr[:, :, :3], train_arr[:, :, 3:]
X_val, y_val = val_arr[:, :, :3], val_arr[:, :, 3:]
X_test, y_test = test_arr[:, :, :3], test_arr[:, :, 3:]

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (633, 100, 3)
y_train shape: (633, 100, 9)
X_val shape: (64, 100, 3)
y_val shape: (64, 100, 9)
X_test shape: (81, 100, 3)
y_test shape: (81, 100, 9)


In [None]:
np.savez(OA_data_dir + 'mixed_train_data.npz', X_train=X_train, y_train=y_train)
np.savez(OA_data_dir + 'mixed_val_data.npz', X_val=X_val, y_val=y_val)
np.savez(OA_data_dir + 'mixed_test_data.npz', X_test=X_test, y_test=y_test)