In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import os
import gc

## Configuration

In [7]:
# Path where the data is stored
SOURCE_PATH = '../data'
# Directory inside SOURCE_PATH where the original data is stored
ORIGINAL_DATA_DIR = '/EXOSAFE'
# Directory inside SOURCE_PATH where the derived data is stored
DERIVED_DATA_DIR = '/derived_data'

# ID fo the training and test data resulting from this notebook, stored in DERIVED_DATA_DIR
RESULTS_ID = '0001_23032021'

# % of the data for the test set
TEST_SIZE = 0.3

# Number of force cells in the robotic leg
N_CELLS = 8

# Experiment params
DATE_EXPERIMENTS = '24022021'
N_EXPERIMENTS = 15


## Preprocessing

In [8]:
rotations = {
    1: [180, 90, 0],
    2: [180, 90, 0],
    3: [180, 0, -90],
    4: [0, 0, -90],
    5: [0, 0, 0],
    6: [0, 180, 0],
    7: [0, 90, 0],
    8: [0, 0, 90],
}

def rotate_vector(v, axis, angle):
    '''
    Args:
    - v (np.array): Vector to be rotated
    - axis (int): Axis along the rotation is performed
    - angle (int): Rotation angle
    
    Returns:
    - (np.array)): Rotated vector
    '''
    if axis == 0:
        # X
        v = v.dot(np.array([[1, 0, 0], [0, np.cos(np.radians(angle)), np.sin(np.radians(angle))], [0, np.sin(np.radians(angle)), np.cos(np.radians(angle))]]))
    elif axis == 1:
        # Y
        v = v.dot(np.array([[np.cos(np.radians(angle)), 0, np.sin(np.radians(angle))], [0, 1, 0], [-np.sin(np.radians(angle)), 0, np.cos(np.radians(angle))]]))
    elif axis == 2:
        # Z
        v = v.dot(np.array([[np.cos(np.radians(angle)), -np.sin(np.radians(angle)), 0], [np.sin(np.radians(angle)), np.cos(np.radians(angle)), 0], [0, 0, 1]]))
    else:
        raise ValueError('Invalid axis')

    return v


In [9]:
for i in tqdm(range(N_EXPERIMENTS)):
    save_dir = os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENTS, str(i + 1))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    H3_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/' + DATE_EXPERIMENTS + '/0{}-'.format(i + 1) + DATE_EXPERIMENTS + '.xlsx', sheet_name='H3processed')
    H3_df.to_csv(save_dir + '/H3.csv', index=False)
    
    leg_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/' + DATE_EXPERIMENTS + '/0{}-'.format(i + 1) + DATE_EXPERIMENTS + '.xlsx', sheet_name='Leg-Replica')
    leg_df.to_csv(save_dir + '/leg.csv', index=False)
    
    forces_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/' + DATE_EXPERIMENTS + '/0{}-'.format(i + 1) + DATE_EXPERIMENTS + '.xlsx', sheet_name='ForceCells', usecols=[i for i in range(N_CELLS * 3)])
    
    for i in range (1, N_CELLS + 1):
        cols = ['F{}x'.format(str(i)), 'F{}y'.format(str(i)), 'F{}z'.format(str(i))]
        for ax in range(3):
            forces_df[cols] = forces_df[cols].apply(lambda v: pd.Series(rotate_vector(v, ax, rotations[i][ax])), axis=1)
    
    forces_df.to_csv(save_dir + '/forces.csv', index=False)


del H3_df, leg_df, forces_df
gc.collect()

100%|██████████| 15/15 [16:08<00:00, 64.54s/it]


26

## Features and target selection

In [48]:
targets_df_ls = []
features_df_ls = []
for i in tqdm(range(N_EXPERIMENTS)):
    data_dir = os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENTS, str(i + 1))
    
    targets_df = pd.read_csv(data_dir + '/forces.csv')
    targets_df_ls.append(targets_df)
    
    features_df = pd.read_csv(data_dir + '/H3.csv')
    features_df_ls.append(features_df)

targets_df = pd.concat(targets_df_ls, axis=0)
features_df = pd.concat(features_df_ls, axis=0)

100%|██████████| 7/7 [00:00<00:00,  8.01it/s]


In [49]:
# Rename columns to manage with some typos
features_df = features_df.rename(columns={'LankleTorque': 'LAnkleTorque', 'RankleTorque': 'RAnkleTorque'})

In [50]:
H3_LEG = 'L' # L|R

features = [H3_LEG + a + m for a in ['Hip', 'Knee', 'Ankle'] for m in ['Pos', 'Vel', 'Acc', 'Torque']]
targets = ['F' + str(i + 1) + ax for i in range(N_CELLS) for ax in ['x', 'y', 'z']]

print('Number of features: {}'.format(len(features)))
print('Number of targets: {}'.format(len(targets)))

Number of features: 12
Number of targets: 24


In [51]:
X = features_df[features]
Y = targets_df[targets]

## Nulls handeling, split and normalization

In [52]:
# Drop null values
idx = X.notna().all(axis=1)
X = X.loc[idx].values
Y = Y.loc[idx].values

In [53]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=0)

print('Train -> X: {}, Y: {}'.format(X_train.shape, Y_train.shape))
print('Test -> X: {}, Y: {}'.format(X_test.shape, Y_test.shape))

Train -> X: (87574, 12), Y: (87574, 24)
Test -> X: (37533, 12), Y: (37533, 24)


In [54]:
scaler = StandardScaler().fit(X_train)

X_train_norm = scaler.transform(X_train)
X_test_norm =  scaler.transform(X_test)

## Save data

In [55]:
save_dir = SOURCE_PATH + DERIVED_DATA_DIR
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

np.save(save_dir + '/X_train_' + RESULTS_ID + '.npy', X_train_norm)    
np.save(save_dir + '/X_test_' + RESULTS_ID + '.npy', X_test_norm)    
np.save(save_dir + '/Y_train_' + RESULTS_ID + '.npy', Y_train)    
np.save(save_dir + '/Y_test_' + RESULTS_ID + '.npy', Y_test)    