# Preprocessing

In [1]:
import os
import sys

import numpy as np
import pandas as pd

project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.utils.data_loading import load_data_from_npz_file, load_data_as_df_from_np_arrays
SEED = 16

## Data Loading

In [2]:
rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones = load_data_from_npz_file('../data/replay_buffer.npz')
fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones = load_data_from_npz_file('../data/final_policy.npz')

rb_df = load_data_as_df_from_np_arrays(rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones)
fp_df = load_data_as_df_from_np_arrays(fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones)

del rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones
del fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones

## Split the state-action pairs into separate episodes

In [3]:
def add_episode_id_to_df(df: pd.DataFrame) -> pd.DataFrame:
    df['episode'] = df['done'].cumsum()
    df['episode'] = df['episode'].shift(fill_value=0).astype(int)
    return df

In [4]:
rb_df = add_episode_id_to_df(rb_df)
fp_df = add_episode_id_to_df(fp_df)
rb_df

Unnamed: 0,X,Y,lv_X,lv_Y,angle,angular_velocity,leg_1,leg_2,action,reward,done,next_X,next_Y,next_lv_X,next_lv_Y,next_angle,next_angular_velocity,next_leg_1,next_leg_2,episode
0,-0.004820,1.401093,-0.488218,-0.436776,0.005592,0.110589,False,False,0,-1.197077,False,-0.009640,1.390689,-0.487559,-0.462411,0.011056,0.109301,False,False,0
1,-0.009640,1.390689,-0.487559,-0.462411,0.011056,0.109301,False,False,1,-2.399095,False,-0.014555,1.379694,-0.499388,-0.488779,0.018885,0.156586,False,False,0
2,-0.014555,1.379694,-0.499388,-0.488779,0.018885,0.156586,False,False,3,-0.752683,False,-0.019400,1.368104,-0.490665,-0.515199,0.024956,0.121428,False,False,0
3,-0.019400,1.368104,-0.490665,-0.515199,0.024956,0.121428,False,False,2,3.208104,False,-0.024134,1.357258,-0.480146,-0.482138,0.031669,0.134276,False,False,0
4,-0.024134,1.357258,-0.480146,-0.482138,0.031669,0.134276,False,False,3,-0.579223,False,-0.028784,1.345807,-0.469743,-0.509043,0.036294,0.092512,False,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,0.493834,0.679598,-0.104434,-0.052801,-0.010288,-0.005370,False,False,2,1.965186,False,0.492889,0.679355,-0.094666,-0.010779,-0.010133,0.003098,False,False,1725
999996,0.492889,0.679355,-0.094666,-0.010779,-0.010133,0.003098,False,False,0,-0.513342,False,0.491943,0.678513,-0.094666,-0.037446,-0.009978,0.003098,False,False,1725
999997,0.491943,0.678513,-0.094666,-0.037446,-0.009978,0.003098,False,False,0,-1.065271,False,0.490998,0.677070,-0.094666,-0.064113,-0.009823,0.003098,False,False,1725
999998,0.490998,0.677070,-0.094666,-0.064113,-0.009823,0.003098,False,False,2,1.672107,False,0.490164,0.676000,-0.084105,-0.047529,-0.009186,0.012742,False,False,1725


## Create Train, Validation and Test subsets (the split is done over the episode ids, not the separate state-action pairs)

In [5]:
from sklearn.model_selection import train_test_split
from typing import Tuple


def train_test_valid_split_df_episode_wise(data: pd.DataFrame,
                                           train_ratio: float = 0.8,
                                           test_ratio: float = 0.1,
                                           valid_ratio: float = 0.1,
                                           seed: int = SEED) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    assert 1-train_ratio-test_ratio-valid_ratio <= 1e-6, 'The train, test and validation proportions have to add up to 1'

    all_episodes = data['episode'].unique()

    train_episodes, test_valid_episodes = train_test_split(all_episodes,
                                                           train_size=train_ratio,
                                                           random_state=seed)
    test_to_testvalid_ratio = test_ratio/(test_ratio+valid_ratio)
    test_episodes, valid_episodes = train_test_split(test_valid_episodes,
                                                     train_size=test_to_testvalid_ratio,
                                                     random_state=seed)

    train_df = data[data['episode'].isin(train_episodes)]
    test_df = data[data['episode'].isin(test_episodes)]
    valid_df = data[data['episode'].isin(valid_episodes)]
    return train_df, test_df, valid_df

In [6]:
train_r, test_r, valid_r = 0.8, 0.1, 0.1

rb_train_df, rb_test_df, rb_valid_df = train_test_valid_split_df_episode_wise(data=rb_df,
                                                                              train_ratio=train_r,
                                                                              test_ratio=test_r,
                                                                              valid_ratio=valid_r,
                                                                              seed=SEED)

fp_train_df, fp_test_df, fp_valid_df = train_test_valid_split_df_episode_wise(data=fp_df,
                                                                              train_ratio=train_r,
                                                                              test_ratio=test_r,
                                                                              valid_ratio=valid_r,
                                                                              seed=SEED)
del train_r, test_r, valid_r

In [7]:
columns_to_drop = [c for c in fp_train_df.columns if c.startswith('next')]
print(f'Dropping {len(columns_to_drop)} columns: {columns_to_drop}')

location_data = {
    'final_policy_episodes/fp_train.parquet': fp_train_df,
    'final_policy_episodes/fp_test.parquet': fp_test_df,
    'final_policy_episodes/fp_valid.parquet': fp_valid_df,

    'replay_buffer_episodes/rb_train.parquet': rb_train_df,
    'replay_buffer_episodes/rb_test.parquet': rb_test_df,
    'replay_buffer_episodes/rb_valid.parquet': rb_valid_df
}

data_folder_path = '../data'

for location, df in location_data.items():
    location_path = os.path.join(data_folder_path, location)
    location_dir = os.path.dirname(location_path)
    os.makedirs(location_dir, exist_ok=True)
    df = df.drop(columns=columns_to_drop)
    # don`t forget to add fastparquet to the requirements.txt
    df.to_parquet(location_path, index=False)

del location_data, data_folder_path
del fp_test_df, fp_valid_df, rb_test_df, rb_valid_df

columns_to_drop += ['episode', 'action']
rb_X_train = rb_train_df.drop(columns=columns_to_drop).to_numpy(dtype=np.float32)
rb_Y_train = rb_train_df['action'].to_numpy(dtype=np.uint8)
del rb_train_df

fp_X_train = fp_train_df.drop(columns=columns_to_drop).to_numpy(dtype=np.float32)
fp_Y_train = fp_train_df['action'].to_numpy(dtype=np.uint8)
del fp_train_df
del columns_to_drop

Dropping 8 columns: ['next_X', 'next_Y', 'next_lv_X', 'next_lv_Y', 'next_angle', 'next_angular_velocity', 'next_leg_1', 'next_leg_2']


## Feature Importance and Normalization

In [8]:
rb_train_df = pd.read_parquet('../data/replay_buffer_episodes/rb_train.parquet').drop(columns=['done'])
fp_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done'])
display(rb_train_df.describe())
display(fp_train_df.describe())

features_names = fp_train_df.drop(columns=['action']).columns

rb_X_train = rb_train_df.drop(columns=['action']).to_numpy(dtype=np.float32)
rb_Y_train = rb_train_df['action'].to_numpy(dtype=np.uint8)

fp_X_train = fp_train_df.drop(columns=['action']).to_numpy(dtype=np.float32)
fp_Y_train = fp_train_df['action'].to_numpy(dtype=np.uint8)

Unnamed: 0,X,Y,lv_X,lv_Y,angle,angular_velocity,action,reward,episode
count,801683.0,801683.0,801683.0,801683.0,801683.0,801683.0,801683.0,801683.0,801683.0
mean,0.002973,0.629331,-0.004684,-0.085404,0.003136,0.000475,1.487005,-0.179928,1086.641245
std,0.316991,0.392169,0.258629,0.18738,0.138516,0.097688,0.975303,3.512852,382.1853
min,-0.999973,-0.329198,-2.946522,-2.793877,-3.3628,-6.078868,0.0,-100.0,0.0
25%,-0.17693,0.285636,-0.09355,-0.10092,-0.036053,-0.025748,0.0,-1.173662,814.0
50%,0.014015,0.551599,0.012877,-0.042579,0.004139,-0.000468,2.0,-0.149561,1081.0
75%,0.187958,0.919961,0.103575,-0.004456,0.039714,0.024428,2.0,0.895119,1396.0
max,0.999957,3.649655,3.050443,0.830236,3.603545,6.082865,3.0,135.202332,1724.0


Unnamed: 0,X,Y,lv_X,lv_Y,angle,angular_velocity,action,reward,episode
count,804059.0,804059.0,804059.0,804059.0,804059.0,804059.0,804059.0,804059.0,804059.0
mean,-0.03233,0.457843,-0.021234,-0.073373,-0.000511,-0.0004639259,1.30499,0.097753,625.247095
std,0.280553,0.366351,0.204107,0.088432,0.058326,0.04219376,1.003881,3.410921,365.347428
min,-0.999958,-0.220163,-1.136813,-0.621478,-0.695194,-1.05724,0.0,-100.0,1.0
25%,-0.17683,0.164315,-0.076613,-0.103988,-0.032129,-0.02040241,0.0,-1.133095,303.0
50%,-0.074826,0.363728,0.002986,-0.053789,0.002167,-5.84562e-07,2.0,-0.083675,624.0
75%,0.047157,0.69017,0.064678,-0.016084,0.03435,0.02008669,2.0,1.106756,949.0
max,0.999992,1.528019,0.831265,0.510804,0.423147,0.7421849,3.0,100.0,1253.0


### Normalization. Train and save normalization modules

In [9]:
from src.normalization import *

def train_and_save_normalization_module_scripted(module: NormalizationModule,
                                                 X_train: torch.Tensor,
                                                 save_path: str) -> None:
    module.fit(X_train)
    save_dir = os.path.dirname(save_path)
    os.makedirs(save_dir, exist_ok=True)
    module.save_as_scripted(save_path)

In [10]:
rb_modules_to_train_and_script = {
    '../models/replay_buffer/min_max_normalization.pt': MinMaxNormalizationModule(),
    '../models/replay_buffer/max_abs_normalization.pt': MaxAbsNormalizationModule(),
    '../models/replay_buffer/standard_normalization.pt': StandardNormalizationModule(),
    '../models/replay_buffer/robust_normalization.pt': RobustNormalizationModule(),
}

rb_X_train = torch.Tensor(rb_X_train)
for path, norm_module in rb_modules_to_train_and_script.items():
    train_and_save_normalization_module_scripted(module=norm_module,
                                                 X_train = rb_X_train,
                                                 save_path = path)

In [11]:
fp_modules_to_train_and_script = {
    '../models/final_policy/min_max_normalization.pt': MinMaxNormalizationModule(),
    '../models/final_policy/max_abs_normalization.pt': MaxAbsNormalizationModule(),
    '../models/final_policy/standard_normalization.pt': StandardNormalizationModule(),
    '../models/final_policy/robust_normalization.pt': RobustNormalizationModule(),
}

fp_X_train = torch.Tensor(fp_X_train)
for path, norm_module in fp_modules_to_train_and_script.items():
    train_and_save_normalization_module_scripted(module=norm_module,
                                                 X_train = fp_X_train,
                                                 save_path = path)

In [None]:
del rb_modules_to_train_and_script, fp_modules_to_train_and_script