# Gap sanity check

This notebook do some test to check the source of the gap between the position and torque in the exoskeleton data

In [1]:
import pandas as pd
import numpy as np
import time
import os
import glob
import json
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from collections import defaultdict
import matplotlib.pyplot as plt

## Configuration

In [2]:
# Directory where the figures of this notebooks will be stored
FIGURES_DIR = './figures/gap_sanity_check'
if not os.path.exists(FIGURES_DIR):
    os.makedirs(FIGURES_DIR)
    
# Directory where the original data is stored
ORIGINAL_DATA_DIR = '../../../../../EXOSAFE-DATA'
# Directory where the derived data is stored
DERIVED_DATA_DIR = '../../../../data'

# Number of force cells in the robotic leg
N_CELLS = 8

pd.set_option('display.max_columns', None)

# Sanity checks

## Original data

In [3]:
data_ls = glob.glob(ORIGINAL_DATA_DIR + '/*/*.xlsx')

print('Files found ({}):'.format(len(data_ls)))
print([file.split('/')[-1] for file in data_ls])

Files found (101):
['01-06022021.xlsx', '02-06022021.xlsx', '03-06022021.xlsx', '07-06022021.xlsx', '08-06022021.xlsx', '10-06022021.xlsx', '03-08022021.xlsx', '04-08022021.xlsx', '02-10022021.xlsx', '03-10022021.xlsx', '04-10022021.xlsx', '05-10022021.xlsx', '01-10032021.xlsx', '01-12022021.xlsx', '02-12022021.xlsx', '03-12022021.xlsx', '04-12022021.xlsx', '05-12022021.xlsx', '06-12022021.xlsx', '07-12022021.xlsx', '02-15022021.xlsx', '03-15022021.xlsx', '04-15022021.xlsx', '01-16022021.xlsx', '02-16022021.xlsx', '03-16022021.xlsx', '04-16022021.xlsx', '05-16022021.xlsx', '06-16022021.xlsx', '02-17022021.xlsx', '03-17022021.xlsx', '04-17022021.xlsx', '01-19022021.xlsx', '010-19022021.xlsx', '011-19022021.xlsx', '012-19022021.xlsx', '013-19022021.xlsx', '014-19022021.xlsx', '015-19022021.xlsx', '016-19022021.xlsx', '017-19022021.xlsx', '018-19022021.xlsx', '02-19022021.xlsx', '03-19022021.xlsx', '04-19022021.xlsx', '05-19022021.xlsx', '06-19022021.xlsx', '07-19022021.xlsx', '08-1902202

In [4]:
# Dictionary to exclude specific experiments (date: [experiment ids])
# These experiments are excluded by lack of data
EXPERIMENTS_TO_EXCLUDE = {
    '06022021': ['01', '02', '03', '07', '08', '10'],
    '08022021': ['03', '04'],
    '10022021': ['02', '03', '04', '05'],
    '12022021': ['01', '02', '03', '04', '05', '06', '07'],
    '15022021': ['02', '03', '04'],
    '26032021': ['01', '02', '03', '04', '06', '07', '09', '011', '012'],
    '21042021': ['02', '03', '04', '05', '06', '07', '08']
}


# Exclude some experiments from the list of files to process
for exp_date, exp_ids in EXPERIMENTS_TO_EXCLUDE.items():
    for i in exp_ids:
        data_ls.remove(ORIGINAL_DATA_DIR + '/{}/{}-{}.xlsx'.format(exp_date, i, exp_date))
        
print('Files to process ({}):'.format(len(data_ls)))
print([file.split('/')[-1] for file in data_ls])

Files to process (63):
['01-10032021.xlsx', '01-16022021.xlsx', '02-16022021.xlsx', '03-16022021.xlsx', '04-16022021.xlsx', '05-16022021.xlsx', '06-16022021.xlsx', '02-17022021.xlsx', '03-17022021.xlsx', '04-17022021.xlsx', '01-19022021.xlsx', '010-19022021.xlsx', '011-19022021.xlsx', '012-19022021.xlsx', '013-19022021.xlsx', '014-19022021.xlsx', '015-19022021.xlsx', '016-19022021.xlsx', '017-19022021.xlsx', '018-19022021.xlsx', '02-19022021.xlsx', '03-19022021.xlsx', '04-19022021.xlsx', '05-19022021.xlsx', '06-19022021.xlsx', '07-19022021.xlsx', '08-19022021.xlsx', '09-19022021.xlsx', '01-22022021.xlsx', '010-22022021.xlsx', '011-22022021.xlsx', '012-22022021.xlsx', '013-22022021.xlsx', '014-22022021.xlsx', '015-22022021.xlsx', '016-22022021.xlsx', '017-22022021.xlsx', '018-22022021.xlsx', '019-22022021.xlsx', '02-22022021.xlsx', '03-22022021.xlsx', '04-22022021.xlsx', '05-22022021.xlsx', '06-22022021.xlsx', '07-22022021.xlsx', '08-22022021.xlsx', '09-22022021.xlsx', '01-24022021.xlsx

In [None]:
# Check the algorithm for all files
for example, file in enumerate(data_ls):
    print(file)
    data_df = pd.read_excel(file, sheet_name=['Parameters', 'H3processed', 'Leg-Replica', 'ForceCells'])
    
    exp_time_shift = data_df['Parameters'].iloc[0]['TimeShift']
    print('Experimental time shift: {}'.format(exp_time_shift))
    
    # exo_arr = data_df['H3processed'].iloc[:, 1].values
    # leg_arr = data_df['Leg-Replica'].iloc[:, 3].values
    # leg_df_processed_exp = shift_leg_data(data_df['Leg-Replica'], exp_time_shift, len(data_df['H3processed']))
    
    # Plot hip signals
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    ax3.spines['right'].set_position(('outward', 60))
    ax3.xaxis.set_ticks([])

    lns1 = ax1.plot(data_df['H3processed'].iloc[:, 0], label='Hip pos (exo)', c='c')
    
    lns2 = ax2.plot(data_df['ForceCells']['F6z'], label='Fz top-front (leg)', alpha=0.4)
    lns3 = ax2.plot(data_df['ForceCells']['F5z'], label='Fz top-back (leg)', alpha=0.4)
    
    lns4 = ax3.plot(data_df['H3processed'].iloc[:, 12], label='Hip torque (exo)', c='m')

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Force Z (N)')
    ax3.set_ylabel('Torque (N/m)')
    plt.title(example)
    
    lns = lns1 + lns2 + lns3 + lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/original_data_hip_{}_{}.png'.format(example, file.split('/')[-1].replace('.xlsx', '')))
    plt.show()
    
    # Plot knee signals
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    ax3.spines['right'].set_position(('outward', 60))
    ax3.xaxis.set_ticks([])
    
    lns1 = ax1.plot(data_df['Leg-Replica'].iloc[:, 3], label='Knee pos (leg)')
    lns2 = ax1.plot(data_df['H3processed'].iloc[:, 1], label='Knee pos (exo)', c='c')
    
    lns3 = ax2.plot(data_df['ForceCells']['F7z'], label='Fz top-front (leg)', alpha=0.4)
    lns4 = ax2.plot(data_df['ForceCells']['F8z'], label='Fz top-back (leg)', alpha=0.4)
    lns5 = ax2.plot(data_df['ForceCells']['F3z'], label='Fz bottom-front (leg)', alpha=0.8)
    lns6 = ax2.plot(data_df['ForceCells']['F4z'], label='Fz bottom-back (leg)', alpha=0.8)
    
    lns7 = ax3.plot(data_df['H3processed'].iloc[:, 13], label='Knee torque (exo)', c='m')

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Forze Z (N)')
    ax3.set_ylabel('Torque (N/m)')
    
    lns = lns1 + lns2 + lns3 + lns4 + lns5 + lns6 + lns7
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/original_data_knee_{}_{}.png'.format(example, file.split('/')[-1].replace('.xlsx', '')))
    plt.show()

    print('\n')

In [None]:
# Check the algorithm for all files
for example, file in enumerate(data_ls):
    print(file)
    data_df = pd.read_excel(file, sheet_name=['H3processed'])
    
    # Plot hip signals
    joint = 'Hip'
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    lns1 = ax1.plot(data_df['H3processed'].iloc[:, 0], label='Left pos ({})'.format(joint), linestyle='--')
    lns2 = ax1.plot(data_df['H3processed'].iloc[:, 9], label='Right pos ({})'.format(joint), linestyle='--')
    
    lns3 = ax2.plot(data_df['H3processed'].iloc[:, 12], label='Left torque ({})'.format(joint))
    lns4 = ax2.plot(data_df['H3processed'].iloc[:, 15], label='Right torque ({})'.format(joint))

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Torque (N/m)')
    
    lns = lns1 + lns2 + lns3 + lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/original_data_left_right_exo_hip_{}_{}.png'.format(example, file.split('/')[-1].replace('.xlsx', '')))
    plt.show()
    
    # Plot knee signals
    joint = 'Knee'
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    lns1 = ax1.plot(data_df['H3processed'].iloc[:, 1], label='Left pos ({})'.format(joint), linestyle='--')
    lns2 = ax1.plot(data_df['H3processed'].iloc[:, 10], label='Right pos ({})'.format(joint), linestyle='--')
    
    lns3 = ax2.plot(data_df['H3processed'].iloc[:, 13], label='Left torque ({})'.format(joint))
    lns4 = ax2.plot(data_df['H3processed'].iloc[:, 16], label='Right torque ({})'.format(joint))

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Torque (N/m)')
    
    lns = lns1 + lns2 + lns3 + lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/original_data_left_right_exo_knee_{}_{}.png'.format(example, file.split('/')[-1].replace('.xlsx', '')))
    plt.show()
    
    # Plot ankle signals
    joint = 'Ankle'
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    lns1 = ax1.plot(data_df['H3processed'].iloc[:, 2], label='Left pos ({})'.format(joint), linestyle='--')
    lns2 = ax1.plot(data_df['H3processed'].iloc[:, 11], label='Right pos ({})'.format(joint), linestyle='--')
    
    lns3 = ax2.plot(data_df['H3processed'].iloc[:, 14], label='Left torque ({})'.format(joint))
    lns4 = ax2.plot(data_df['H3processed'].iloc[:, 17], label='Right torque ({})'.format(joint))

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Torque (N/m)')
    
    lns = lns1 + lns2 + lns3 + lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/original_data_left_right_exo_ankle_{}_{}.png'.format(example, file.split('/')[-1].replace('.xlsx', '')))
    plt.show()

    print('\n')

## Derived data

In [9]:
H3_LEG = 'L' # L|R

features = [H3_LEG + a + m for a in ['Hip', 'Knee'] for m in ['Pos', 'Torque']]
targets = ['F' + str(i + 1) + ax for i in range(N_CELLS) for ax in ['x', 'y', 'z']]

print('Number of features: {}'.format(len(features)))
print('Selected features: {}'.format(features))
print('\n')
print('Number of targets: {}'.format(len(targets)))
print('Selected targets: {}'.format(targets))

Number of features: 4
Selected features: ['LHipPos', 'LHipTorque', 'LKneePos', 'LKneeTorque']


Number of targets: 24
Selected targets: ['F1x', 'F1y', 'F1z', 'F2x', 'F2y', 'F2z', 'F3x', 'F3y', 'F3z', 'F4x', 'F4y', 'F4z', 'F5x', 'F5y', 'F5z', 'F6x', 'F6y', 'F6z', 'F7x', 'F7y', 'F7z', 'F8x', 'F8y', 'F8z']


In [10]:
experiments_dirs_path = glob.glob(DERIVED_DATA_DIR + '/*/*')
experiments_dirs_path

['../../../../data/10032021/1',
 '../../../../data/16022021/1',
 '../../../../data/16022021/2',
 '../../../../data/16022021/3',
 '../../../../data/16022021/4',
 '../../../../data/16022021/5',
 '../../../../data/16022021/6',
 '../../../../data/17022021/2',
 '../../../../data/17022021/3',
 '../../../../data/17022021/4',
 '../../../../data/19022021/1',
 '../../../../data/19022021/10',
 '../../../../data/19022021/11',
 '../../../../data/19022021/12',
 '../../../../data/19022021/13',
 '../../../../data/19022021/14',
 '../../../../data/19022021/15',
 '../../../../data/19022021/16',
 '../../../../data/19022021/17',
 '../../../../data/19022021/18',
 '../../../../data/19022021/2',
 '../../../../data/19022021/3',
 '../../../../data/19022021/4',
 '../../../../data/19022021/5',
 '../../../../data/19022021/6',
 '../../../../data/19022021/7',
 '../../../../data/19022021/8',
 '../../../../data/19022021/9',
 '../../../../data/22022021/1',
 '../../../../data/22022021/10',
 '../../../../data/22022021/11

In [11]:
# Load some examples
targets_dict = {}
features_dict = {}
for i, exp_path in enumerate(experiments_dirs_path):
    #print(exp_path)
    # Load targets
    targets_df = pd.read_csv(exp_path + '/force_cells_processed.csv')

    # Load features
    features_df = pd.read_csv(exp_path + '/H3_processed.csv')

    # Drop first row to remove noise in the start of the data recording
    targets_df = targets_df.iloc[1:]
    features_df = features_df.iloc[1:]
    # Drop null values
    idx = features_df.notna().all(axis=1)
    features_df = features_df.loc[idx]
    targets_df = targets_df.loc[idx]
    #print('Droping {} data points by null features'.format(len(idx[idx == False])))

    assert(len(features_df) == len(targets_df))
    # Store the final array
    targets_dict[i] = targets_df[targets]
    features_dict[i] = features_df[features]

In [None]:
# Plot hip and knee torque and z forces to check the relationship between data
for example in range(len(targets_dict)):
    print(experiments_dirs_path[example])
    
    # Plot hip signals
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    ax3.spines['right'].set_position(('outward', 60))
    ax3.xaxis.set_ticks([])

    lns1 = ax1.plot(features_dict[example]['LHipPos'], label='Hip pos (exo)', c='c')
    
    lns2 = ax2.plot(targets_dict[example]['F6z'], label='Fz top-front (leg)', alpha=0.4)
    lns3 = ax2.plot(targets_dict[example]['F5z'], label='Fz top-back (leg)', alpha=0.4)
    
    lns4 = ax3.plot(features_dict[example]['LHipTorque'], label='Hip torque (exo)', c='m')

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Force Z (N)')
    ax3.set_ylabel('Torque (N/m)')
    plt.title(example)
    
    lns = lns1 + lns2 + lns3 + lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/derived_data_hip_{}_{}.png'.format(example, experiments_dirs_path[example].split('/')[-2]))
    plt.show()
    
    # Plot knee signals
    fig, ax1 = plt.subplots(figsize=(30,10))
    
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    ax3.spines['right'].set_position(('outward', 60))
    ax3.xaxis.set_ticks([])

    lns1 = ax1.plot(features_dict[example]['LKneePos'], label='Knee pos (exo)', c='c')
    
    lns2 = ax2.plot(targets_dict[example]['F7z'], label='Fz top-front (leg)', alpha=0.4)
    lns3 = ax2.plot(targets_dict[example]['F8z'], label='Fz top-back (leg)', alpha=0.4)
    lns4 = ax2.plot(targets_dict[example]['F3z'], label='Fz bottom-front (leg)', alpha=0.8)
    lns5 = ax2.plot(targets_dict[example]['F4z'], label='Fz bottom-back (leg)', alpha=0.8)
    
    lns6 = ax3.plot(features_dict[example]['LKneeTorque'], label='Knee torque (exo)', c='m')

    ax1.set_ylabel('Position (º)')
    ax2.set_ylabel('Force Z (N)')
    ax3.set_ylabel('Torque (N/m)')
    plt.title(example)
    
    lns = lns1 + lns2 + lns3 + lns4 + lns5 + lns6
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper left')

    plt.savefig(FIGURES_DIR + '/derived_data_knee_{}_{}.png'.format(example, experiments_dirs_path[example].split('/')[-2]))
    plt.show()
    
    print('')