In [1]:
from scipy.io import loadmat

data = loadmat("Sub7_segMechanics.mat")
print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'segMechanics'])


In [2]:
print(type(data['segMechanics']))
print(data['segMechanics'].shape)

<class 'numpy.ndarray'>
(1, 1)


In [3]:
#Extracting the actual struct
seg = data['segMechanics'][0,0]
print(type(seg))

<class 'numpy.void'>


In [12]:
#See what fields exist inside struct
print(seg.dtype.names)

('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'Pref_Start', 'Pref_End')


In [5]:
#Inspecting the trial
trial = seg['T1']
print(type(trial))
print(trial.shape)

<class 'numpy.ndarray'>
(1, 1)


In [11]:
#Unwrapping
trial = seg['T7'][0,0]
print(trial.dtype.names)

('GaitIndex', 'GRF', 'CoP', 'Angles')


In [13]:
trial = seg['T1'][0,0]

angles = trial['Angles'][0,0]
print(angles.dtype.names)

('Hip_Left', 'Hip_Right', 'Knee_Left', 'Knee_Right', 'Ankle_Left', 'Ankle_Right', 'Pelvis_Saggital', 'Pelvis_Frontal', 'Pelvis_Transverse')


In [78]:
import scipy.io as sio
import h5py
import pandas as pd
import numpy as np

#load data
# 'squeeze_me' simplifies the Matlab nesting dolls
mech = sio.loadmat('Sub7_segMechanics.mat', squeeze_me=True)
ener = h5py.File('Sub7_segEnergetics.mat', 'r')


trial_list = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 
              'T11', 'T12', 'T13', 'T14', 'T15', 'Pref_Start', 'Pref_End']

all_steps = []
    
for trial_name in trial_list:

    try:
        trial_mech = mech['segMechanics'][trial_name].item()
        angles = trial_mech['Angles'].item()
    except (KeyError, ValueError):
        print(f"Skipping {trial_name} (no Angles)")
        continue

    try:
        fatigue_target = np.mean(ener['segEnergetics'][trial_name]['Metabolic']['MetCost'][()].flatten())
    except KeyError:
        print(f"Skipping {trial_name} (no MetCost)")
        continue

# 3. EXTRACT SPECIFIC SENSORS (The Features)
    # We take the 101 points for Pelvis (Hiking) and Hip (Pulling)
    pelvis_hiking = angles['Pelvis_Frontal'].item() # Hip Hiking indicator
    hip_pull = angles['Hip_Left'].item()           # Power Burst indicator
    knee_shock = angles['Knee_Left'].item()         # Shock Absorption indicator
    

    
    # 4. BUILD THE ROW
    # For every step (row) in this trial, we combine the features and the target
    for i in range(len(pelvis_hiking)):
        # Join the 101 points of each angle into one long 303-point feature row
        feature_row = np.hstack([pelvis_hiking[i], hip_pull[i], knee_shock[i]])
        
        # Add the Fatigue Target (Metabolic Cost) to the end of the row
        full_row = np.append(feature_row, [float(fatigue_target), trial_name]) #else fasle accuracy of model. same avg mc.
        all_steps.append(full_row)

# 5. CREATE THE FINAL ML TABLE
# Generate names for 303 feature columns + 1 target column
col_names = [f'Feat_{i}' for i in range(303)] + ['Fatigue_Score', 'Trial']
df_final = pd.DataFrame(all_steps, columns=col_names)

# Ensure numeric column
df_final['Fatigue_Score'] = df_final['Fatigue_Score'].astype(float)

print(f"Data Prep Complete! You have {df_final.shape} steps ready for ML.")
print(df_final.head())

Skipping Pref_Start (no Angles)
Skipping Pref_End (no Angles)
Data Prep Complete! You have (3916, 305) steps ready for ML.
               Feat_0              Feat_1              Feat_2  \
0  1.1524715291721543  1.1762921822238601  1.2368504859978966   
1  1.2248609398637589  1.3017610981031889  1.3206584001944848   
2  1.1250847728611497  1.1198554492612416  1.1638155338851077   
3  0.9760388579701443  0.8774179626242364   0.934016393524201   
4  1.3680956515316571  1.3677664335172877  1.3413445807354707   

               Feat_3              Feat_4              Feat_5  \
0  1.3090789381507917  1.5065258745639187  1.7077937436592028   
1  1.4968189382603474  1.6405770521066154  1.8159401017934165   
2  1.3189769665574111    1.50070903324463  1.7202313143726902   
3  0.9855865651926333  1.0552155170885162  1.2284270531439934   
4  1.3447925161163607  1.3624457895793096  1.4045976768072395   

               Feat_6              Feat_7              Feat_8  \
0   1.906622864623123   2.1600

In [1]:
import h5py

ener = h5py.File('Sub7_segEnergetics.mat', 'r')

print(list(ener.keys()))


['#refs#', 'segEnergetics']


In [3]:
trial_mech = mech['segMechanics']['T1'].item()
print(trial_mech.dtype.names)

('GaitIndex', 'GRF', 'CoP', 'Angles')


In [6]:
print(angles.dtype.names)

('Hip_Left', 'Hip_Right', 'Knee_Left', 'Knee_Right', 'Ankle_Left', 'Ankle_Right', 'Pelvis_Saggital', 'Pelvis_Frontal', 'Pelvis_Transverse')


In [8]:
print(list(ener.keys()))

['#refs#', 'segEnergetics']


In [9]:
print(list(ener['segEnergetics'].keys()))

['MetabolicResting', 'NormFactors', 'Pref_End', 'Pref_Start', 'T1', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9']


In [11]:
#print(list(ener['segEnergetics']['MC'].keys()))
# MC does not exist

In [12]:
print(list(ener['segEnergetics']['T1'].keys()))

['EMG', 'Metabolic']


In [13]:
trial_data = ener['segEnergetics']['T1']['Metabolic']
print(list(trial_data.keys()))

['CostTrans', 'MetCost', 'RER']


In [16]:
mc = ener['segEnergetics']['T1']['Metabolic']['MetCost'][()]
print(mc)
print(mc.shape)

[[1.96532311 1.9816298  2.22976134 1.98020594 1.38489516 1.75792563
  2.47017404 2.05252811 2.42509045 3.19979459 1.46444506 2.76749535
  2.67796441 2.78165753 2.84112053 4.01294636 4.0042536  2.24240118
  3.41930308 3.51934618 4.08373555 3.54615894 3.48154542 3.58841311
  3.91348459 3.4662005  3.01635287 3.16608094 3.20551782 2.86825186
  3.20916678 2.87786191 2.82935053 2.59644232 3.0376466  2.85736683
  3.14420842 2.34192365 2.6967691  3.11822295 2.80976527 3.31439871
  3.18556969 2.88865191 3.23983949 2.55636719 3.05189165 3.04172464
  2.93392313 3.0282479  3.2122255  3.08959605 2.46940415 2.9092716
  2.78249217 3.03115436 2.99929123 2.17077831 2.72875722 2.61759277
  2.78579886 3.0610116  2.59660425 3.24115425 3.20671441 3.14615222
  3.22556912 2.7709549 ]]
(1, 68)


model time

In [59]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Split features and target
X = df_final.drop(columns=['Fatigue_Score'])
y = df_final['Fatigue_Score']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Build model
model = RandomForestRegressor(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

# Train
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("R²:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

#old before updating fullrow and colname to include trial split
#R²: 0.9755098831467827
#RMSE: 0.14654785363697828

ValueError: could not convert string to float: np.str_('T9')

In [79]:
from sklearn.model_selection import train_test_split

# Get unique trials
unique_trials = df_final['Trial'].unique()

# Split trials (not steps!) !!! VERY IMP. AVOID DATA LEAKAGE
train_trials, test_trials = train_test_split(
    unique_trials,
    test_size=0.2,
    random_state=42
)

print("Train Trials:", train_trials)
print("Test Trials:", test_trials)

Train Trials: [np.str_('T14') np.str_('T6') np.str_('T9') np.str_('T3') np.str_('T2')
 np.str_('T15') np.str_('T5') np.str_('T8') np.str_('T11') np.str_('T13')
 np.str_('T4') np.str_('T7')]
Test Trials: [np.str_('T10') np.str_('T12') np.str_('T1')]


In [80]:
train_df = df_final[df_final['Trial'].isin(train_trials)]
test_df  = df_final[df_final['Trial'].isin(test_trials)]

X_train = train_df.drop(columns=['Fatigue_Score', 'Trial'])
y_train = train_df['Fatigue_Score']

X_test = test_df.drop(columns=['Fatigue_Score', 'Trial'])
y_test = test_df['Fatigue_Score']

In [81]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

model = RandomForestRegressor(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("R²:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

R²: 0.762532904482562
RMSE: 0.41349780541435366


In [82]:
y_train = pd.to_numeric(y_train)
y_test  = pd.to_numeric(y_test)
print("Mean:", y_test.mean())
print("Std:", y_test.std()) # string! will break!

Mean: 2.441135646848495
Std: 0.8490884824454107


In [83]:
range_y = y_test.max() - y_test.min()
print("NRMSE:", 0.413 / range_y)

NRMSE: 0.2116318233489573


In [84]:
#now, making a faitgue score of 0 to 100 from this 
# z instead of linear
mean_cost = df_final['Fatigue_Score'].mean()
std_cost = df_final['Fatigue_Score'].std()

def fatigue_to_score(predicted_cost):
    z = (predicted_cost - mean_cost) / std_cost
    score = 50 + (z * 15)  # 50 = average, 15 = spread
    return np.clip(score, 0, 100)

In [85]:
def predict_fatigue(new_angle_data):
    
    # 1. Format features exactly like training
    feature_row = build_feature_vector(new_angle_data)
    
    # 2. Scale if you used scaler
    feature_row = scaler.transform([feature_row])
    
    # 3. Predict metabolic cost
    predicted_cost = model.predict(feature_row)[0]
    
    # 4. Convert to 0–100 fatigue score
    fatigue_score = fatigue_to_score(predicted_cost, min_cost, max_cost)
    
    return round(fatigue_score, 1)

In [86]:
#to rank on basis of avg metabolic costs to find speed groups. since not a columns
trial_means = {}

for trial_name in trial_list:
    try:
        met = ener['segEnergetics'][trial_name]['Metabolic']['MetCost'][()]
        trial_means[trial_name] = np.mean(met)
    except KeyError:
        continue

sorted_trials = sorted(trial_means.items(), key=lambda x: x[1])

for t, val in sorted_trials:
    print(t, val)

T12 1.092583220563616
T11 1.164440630347842
T13 1.2221780139473155
T14 1.2317186408228655
T15 1.5436120822669719
T6 2.230593217866978
T8 2.3071786610243055
T7 2.643693669704861
T1 2.8870274217009246
T9 2.9274571691524622
T10 3.044085567727204
T3 3.0813067144590436
T2 3.38920414614899
T4 3.903571247666885
T5 3.924505372068571


In [87]:
def get_speed(trial_name):
    low_speed = ['T12', 'T11', 'T13', 'T14', 'T15']
    mid_speed = ['T6', 'T8', 'T7', 'T1', 'T9']
    high_speed = ['T10', 'T3', 'T2', 'T4', 'T5']
    
    if trial_name in low_speed:
        return 0.4
    elif trial_name in mid_speed:
        return 0.8
    elif trial_name in high_speed:
        return 1.1
    else:
        return 0  # fallback