In [14]:
import pandas as pd
import numpy as np
import time
import os
import glob
import json
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from collections import defaultdict

## Configuration

In [2]:
# Directory where the derived data is stored
DERIVED_DATA_DIR = '../../../data'

# Number of force cells in the robotic leg
N_CELLS = 8

pd.set_option('display.max_columns', None)

## Prediction of the data from the same experiment 

In [3]:
H3_LEG = 'L' # L|R

features = [H3_LEG + a + m for a in ['Hip', 'Knee'] for m in ['Pos', 'Torque']]
targets = ['F' + str(i + 1) + ax for i in range(N_CELLS) for ax in ['x', 'y', 'z']]

print('Number of features: {}'.format(len(features)))
print('Selected features: {}'.format(features))
print('\n')
print('Number of targets: {}'.format(len(targets)))
print('Selected targets: {}'.format(targets))

Number of features: 4
Selected features: ['LHipPos', 'LHipTorque', 'LKneePos', 'LKneeTorque']


Number of targets: 24
Selected targets: ['F1x', 'F1y', 'F1z', 'F2x', 'F2y', 'F2z', 'F3x', 'F3y', 'F3z', 'F4x', 'F4y', 'F4z', 'F5x', 'F5y', 'F5z', 'F6x', 'F6y', 'F6z', 'F7x', 'F7y', 'F7z', 'F8x', 'F8y', 'F8z']


In [4]:
# Scaler to normalize the data
SCALER = MinMaxScaler() # StandardScaler()

# Percentage of the date for the test set
TEST_SIZE = 0.3

In [5]:
experiments_dirs_path = glob.glob(DERIVED_DATA_DIR + '/*/*')
exp_path = experiments_dirs_path[0]

print('Experiment: ' + exp_path)

Experiment: ../../../data/10032021/1


In [6]:
# Load targets
targets_df = pd.read_csv(exp_path + '/force_cells_processed.csv')

# Load features
features_df = pd.read_csv(exp_path + '/H3_processed.csv')
# leg_df = pd.read_csv(exp_path + '/leg_processed.csv')
# features_df = pd.concat([exo_df, leg_df], axis=1)

# Drop first row to remove noise in the start of the data recording
targets_df = targets_df.iloc[1:]
features_df = features_df.iloc[1:]
# Drop null values
idx = features_df.notna().all(axis=1)
features_df = features_df.loc[idx]
targets_df = targets_df.loc[idx]
print('Droping {} data points by null features'.format(len(idx[idx == False])))

assert(len(features_df) == len(targets_df))
# Store the final array
targets_arr = targets_df[targets].values
features_arr = features_df[features].values

print('X: {}, Y: {}'.format(features_arr.shape, targets_arr.shape))

Droping 2 data points by null features
X: (6072, 4), Y: (6072, 24)


In [7]:
idx_split = int(targets_arr.shape[0] * TEST_SIZE)

X_train = features_arr[:-idx_split, :]
X_test = features_arr[-idx_split:, :]

Y_train = targets_arr[:-idx_split, :]
Y_test = targets_arr[-idx_split:, :]

print('Train -> X: {}, Y: {}'.format(X_train.shape, Y_train.shape))
print('Test -> X: {}, Y: {}'.format(X_test.shape, Y_test.shape))

Train -> X: (4251, 4), Y: (4251, 24)
Test -> X: (1821, 4), Y: (1821, 24)


In [8]:
s = SCALER.fit(X_train)

X_train = s.transform(X_train)
X_test = s.transform(X_test)

print('Train -> \n min: {}, \n max: {}, \n mean: {}, \n std: {}\n'.format(np.min(X_train, axis=0), np.max(X_train, axis=0), np.mean(X_train, axis=0), np.std(X_train, axis=0)))
print('Test -> \n min: {}, \n max: {}, \n mean: {}, \n std: {}\n'.format(np.min(X_test, axis=0), np.max(X_test, axis=0), np.mean(X_test, axis=0), np.std(X_test, axis=0)))

Train -> 
 min: [0. 0. 0. 0.], 
 max: [1. 1. 1. 1.], 
 mean: [0.43460795 0.4028391  0.22404348 0.45510021], 
 std: [0.33853871 0.23908632 0.29838709 0.27007675]

Test -> 
 min: [1.14338908e-03 1.44560816e-02 4.80148601e-06 2.47788203e-03], 
 max: [0.99984536 1.01702033 0.99921221 1.00004503], 
 mean: [0.44575618 0.40768544 0.23334027 0.44694299], 
 std: [0.34346745 0.24434585 0.30538404 0.27623816]



In [11]:
# Setup the model with the best parameters
model = RandomForestRegressor(random_state=0, n_jobs=-1, verbose=0)

t_start = time.time()
model.fit(X_train, Y_train)
tr_time = time.time() - t_start
print('Training time: {:.4f}'.format(tr_time))

train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

results = {
    'Train': {
        'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
        'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
    },
    'Test': {
        'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
        'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
    }       
    
}

results_summary = {}
results_summary['Training time'] = tr_time
# Display the score mean and standard deviation of each axis
for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores)))
            results_summary[' '.join([subset, force, loss])] = '{:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores))
            


Training time: 0.6243
Train Fx MAE: 0.3248 ± 0.1267
Train Fx MSE: 0.3407 ± 0.2547
Train Fx R2: 0.9982 ± 0.0011
Train Fy MAE: 0.3496 ± 0.2232
Train Fy MSE: 0.5160 ± 0.5889
Train Fy R2: 0.9945 ± 0.0031
Train Fz MAE: 0.3190 ± 0.1284
Train Fz MSE: 0.3234 ± 0.2479
Train Fz R2: 0.9982 ± 0.0010
Test Fx MAE: 1.1975 ± 0.4073
Test Fx MSE: 5.1875 ± 3.1007
Test Fx R2: 0.9681 ± 0.0191
Test Fy MAE: 1.2261 ± 0.7991
Test Fy MSE: 6.0645 ± 6.0490
Test Fy R2: 0.9282 ± 0.0350
Test Fz MAE: 1.2854 ± 0.4892
Test Fz MSE: 5.8195 ± 2.9215
Test Fz R2: 0.9603 ± 0.0242


In [22]:
results_full = defaultdict(list)

for exp_path in experiments_dirs_path:
    print(exp_path)
    # Load targets
    targets_df = pd.read_csv(exp_path + '/force_cells_processed.csv')

    # Load features
    features_df = pd.read_csv(exp_path + '/H3_processed.csv')

    # Drop first row to remove noise in the start of the data recording
    targets_df = targets_df.iloc[1:]
    features_df = features_df.iloc[1:]
    # Drop null values
    idx = features_df.notna().all(axis=1)
    features_df = features_df.loc[idx]
    targets_df = targets_df.loc[idx]
    print('Droping {} data points by null features'.format(len(idx[idx == False])))

    assert(len(features_df) == len(targets_df))
    # Store the final array
    targets_arr = targets_df[targets].values
    features_arr = features_df[features].values

    print('X: {}, Y: {}'.format(features_arr.shape, targets_arr.shape))

    idx_split = int(targets_arr.shape[0] * TEST_SIZE)

    X_train = features_arr[:-idx_split, :]
    X_test = features_arr[-idx_split:, :]

    Y_train = targets_arr[:-idx_split, :]
    Y_test = targets_arr[-idx_split:, :]

    print('Train -> X: {}, Y: {}'.format(X_train.shape, Y_train.shape))
    print('Test -> X: {}, Y: {}'.format(X_test.shape, Y_test.shape))

    s = SCALER.fit(X_train)

    X_train = s.transform(X_train)
    X_test = s.transform(X_test)

    print('Train -> \n min: {}, \n max: {}, \n mean: {}, \n std: {}\n'.format(np.min(X_train, axis=0), np.max(X_train, axis=0), np.mean(X_train, axis=0), np.std(X_train, axis=0)))
    print('Test -> \n min: {}, \n max: {}, \n mean: {}, \n std: {}\n'.format(np.min(X_test, axis=0), np.max(X_test, axis=0), np.mean(X_test, axis=0), np.std(X_test, axis=0)))

    # Setup the model with the best parameters
    model = RandomForestRegressor(random_state=0, n_jobs=-1, verbose=0)

    t_start = time.time()
    model.fit(X_train, Y_train)
    tr_time = time.time() - t_start
    print('Training time: {:.4f}'.format(tr_time))

    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)

    results = {
        'Train': {
            'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
            'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
            'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
        },
        'Test': {
            'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
            'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
            'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
        }       

    }

    results_summary = {}
    results_summary['Training time'] = tr_time
    # Display the score mean and standard deviation of each axis
    for subset in ['Train', 'Test']:
        for f, force in enumerate(['Fx', 'Fy', 'Fz']):
            for loss in ['MAE', 'MSE', 'R2']:
                scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
                s_mean = np.mean(scores)
                s_std = np.std(scores)
                
                print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(s_mean, s_std))
                results_summary[' '.join([subset, force, loss])] = '{:.4f} ± {:.4f}'.format(s_mean, s_std)
                
                results_full[' '.join([subset, force, loss])].append(s_mean)

    print('\n')

../../../data/10032021/1
Droping 2 data points by null features
X: (6072, 4), Y: (6072, 24)
Train -> X: (4251, 4), Y: (4251, 24)
Test -> X: (1821, 4), Y: (1821, 24)
Train -> 
 min: [0. 0. 0. 0.], 
 max: [1. 1. 1. 1.], 
 mean: [0.43460795 0.4028391  0.22404348 0.45510021], 
 std: [0.33853871 0.23908632 0.29838709 0.27007675]

Test -> 
 min: [1.14338908e-03 1.44560816e-02 4.80148601e-06 2.47788203e-03], 
 max: [0.99984536 1.01702033 0.99921221 1.00004503], 
 mean: [0.44575618 0.40768544 0.23334027 0.44694299], 
 std: [0.34346745 0.24434585 0.30538404 0.27623816]

Training time: 0.5445
Train Fx MAE: 0.3248 ± 0.1267
Train Fx MSE: 0.3407 ± 0.2547
Train Fx R2: 0.9982 ± 0.0011
Train Fy MAE: 0.3496 ± 0.2232
Train Fy MSE: 0.5160 ± 0.5889
Train Fy R2: 0.9945 ± 0.0031
Train Fz MAE: 0.3190 ± 0.1284
Train Fz MSE: 0.3234 ± 0.2479
Train Fz R2: 0.9982 ± 0.0010
Test Fx MAE: 1.1975 ± 0.4073
Test Fx MSE: 5.1875 ± 3.1007
Test Fx R2: 0.9681 ± 0.0191
Test Fy MAE: 1.2261 ± 0.7991
Test Fy MSE: 6.0645 ± 6.0490

In [23]:
# Display the score mean and standard deviation of each axis
for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = results_full[' '.join([subset, force, loss])]
            s_mean = np.mean(scores)
            s_median = np.median(scores)
            s_std = np.std(scores)

            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(s_mean, s_std))            
            print(' '.join([subset, force, loss, '(median)']) + ': {:.4f}'.format(s_median))

Train Fx MAE: 0.3431 ± 0.0769
Train Fx MAE (median): 0.3249
Train Fx MSE: 0.3899 ± 0.2893
Train Fx MSE (median): 0.3106
Train Fx R2: 0.9960 ± 0.0023
Train Fx R2 (median): 0.9967
Train Fy MAE: 0.3976 ± 0.1025
Train Fy MAE (median): 0.3731
Train Fy MSE: 0.6575 ± 0.5049
Train Fy MSE (median): 0.5187
Train Fy R2: 0.9942 ± 0.0026
Train Fy R2 (median): 0.9945
Train Fz MAE: 0.3990 ± 0.1093
Train Fz MAE (median): 0.3779
Train Fz MSE: 0.5991 ± 0.5781
Train Fz MSE (median): 0.4542
Train Fz R2: 0.9962 ± 0.0020
Train Fz R2 (median): 0.9967
Test Fx MAE: 1.6884 ± 1.2286
Test Fx MAE (median): 1.2379
Test Fx MSE: 15.3234 ± 48.4273
Test Fx MSE (median): 4.4948
Test Fx R2: 0.8632 ± 0.2022
Test Fx R2 (median): 0.9401
Test Fy MAE: 1.7936 ± 0.9042
Test Fy MAE (median): 1.3809
Test Fy MSE: 15.9894 ± 23.7354
Test Fy MSE (median): 6.9496
Test Fy R2: 0.7606 ± 0.4480
Test Fy R2 (median): 0.9121
Test Fz MAE: 2.1861 ± 1.5952
Test Fz MAE (median): 1.5020
Test Fz MSE: 27.0706 ± 72.3742
Test Fz MSE (median): 6.0753
