In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
import gc

## Configuration

In [30]:
# Path where the data is stored
SOURCE_PATH = '../../data'
# Directory inside SOURCE_PATH where the original data is stored
ORIGINAL_DATA_DIR = '/original_data'
# Directory inside SOURCE_PATH where the derived data is stored
DERIVED_DATA_DIR = '/derived_data'
# Directory inside SOURCE_PATH where the data resulting from this notebook is stored
RESULTS_DIR = '/models_data/0001_16032021'

# % of the data for the test set
TEST_SIZE = 0.3

# Number of force cells in the robotic leg
N_CELLS = 8

# Experiment params
DATE = '12022021'
N_EXPERIMENTS = 7


## Preprocessing

In [8]:
rotations = {
    1: [180, 90, 0],
    2: [180, 90, 0],
    3: [180, 0, -90],
    4: [0, 0, -90],
    5: [0, 0, 0],
    6: [0, 180, 0],
    7: [0, 90, 0],
    8: [0, 0, 90],
}

def rotate_vector(v, axis, angle):
    '''
    
    Args:
    - v (np.array):
    - axis (int):
    - angle ():
    
    Returns:
    - (pd.Series):
    '''
    if axis == 0:
        # X
        v = v.dot(np.array([[1, 0, 0], [0, np.cos(np.radians(angle)), np.sin(np.radians(angle))], [0, np.sin(np.radians(angle)), np.cos(np.radians(angle))]]))
    elif axis == 1:
        # Y
        v = v.dot(np.array([[np.cos(np.radians(angle)), 0, np.sin(np.radians(angle))], [0, 1, 0], [-np.sin(np.radians(angle)), 0, np.cos(np.radians(angle))]]))
    elif axis == 2:
        # Z
        v = v.dot(np.array([[np.cos(np.radians(angle)), -np.sin(np.radians(angle)), 0], [np.sin(np.radians(angle)), np.cos(np.radians(angle)), 0], [0, 0, 1]]))
    else:
        raise ValueError('Invalid axis')

    return pd.Series(v)


In [9]:
for i in range(N_EXPERIMENTS):
    save_dir = os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE, str(i + 1))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    H3_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/0{}-'.format(i + 1) + DATE + '.xlsx', sheet_name='H3processed')
    H3_df.to_csv(save_dir + '/H3.csv', index=False)
    
    leg_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/0{}-'.format(i + 1) + DATE + '.xlsx', sheet_name='Leg-Replica')
    leg_df.to_csv(save_dir + '/leg.csv', index=False)
    
    forces_df = pd.read_excel(SOURCE_PATH + ORIGINAL_DATA_DIR + '/0{}-'.format(i + 1) + DATE + '.xlsx', sheet_name='ForceCells', usecols=[i for i in range(N_CELLS * 3)])
    
    for i in range (1, N_CELLS + 1):
        cols = ['F{}x'.format(str(i)), 'F{}y'.format(str(i)), 'F{}z'.format(str(i))]
        for ax in range(3):
            forces_df[cols] = forces_df[cols].apply(lambda v: rotate_vector(v, ax, rotations[i][ax]), axis=1)
    
    forces_df.to_csv(save_dir + '/forces.csv', index=False)


del H3_df, leg_df, forces_df
gc.collect()

0

## Features and target selection

In [18]:
targets_df_ls = []
features_df_ls = []
for i in range(N_EXPERIMENTS):
    data_dir = os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE, str(i + 1))
    
    targets_df = pd.read_csv(data_dir + '/forces.csv')
    targets_df_ls.append(target_df)
    
    h3_features_df = pd.read_csv(data_dir + '/H3.csv')
    leg_features_df = pd.read_csv(data_dir + '/leg.csv')
    features_df = pd.concat([h3_features_df, leg_features_df], axis=1)
    features_df_ls.append(features_df)

targets_df = pd.concat(targets_df_ls, axis=0)
features_df = pd.concat(features_df_ls, axis=0)

In [19]:
# Rename columns to manage with some typos
features_df = features_df.rename(columns={'LankleTorque': 'LAnkleTorque', 'RankleTorque': 'RAnkleTorque'})

In [20]:
H3_LEG = 'L' # L|R

features = [H3_LEG + a + m for a in ['Hip', 'Knee', 'Ankle'] for m in ['Pos', 'Vel', 'Acc', 'Torque']]
targets = ['F' + str(i + 1) + ax for i in range(N_CELLS) for ax in ['x', 'y', 'z']]

print('Number of features: {}'.format(len(features)))
print('Number of targets: {}'.format(len(targets)))

Number of features: 12
Number of targets: 24


In [21]:
X = features_df[features]
Y = targets_df[targets]

## Nulls handeling, split and normalization

In [22]:
# Drop null values
idx = X.notna().all(axis=1)
X = X.loc[idx].values
Y = target_df.loc[idx].values

In [23]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=0)

print('Train -> X: {}, Y: {}'.format(X_train.shape, Y_train.shape))
print('Test -> X: {}, Y: {}'.format(X_test.shape, Y_test.shape))

Train -> X: (88190, 12), Y: (88190, 24)
Test -> X: (37796, 12), Y: (37796, 24)


In [26]:
scaler = StandardScaler().fit(X_train)

X_train_norm = scaler.transform(X_train)
X_test_norm =  scaler.transform(X_test)

## Save data

In [29]:
save_dir = SOURCE_PATH + RESULTS_DIR
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

np.save(save_dir + '/X_train.npy', X_train_norm)    
np.save(save_dir + '/X_test.npy', X_test_norm)    
np.save(save_dir + '/Y_train.npy', Y_train)    
np.save(save_dir + '/Y_test.npy', Y_test)    