# Behaviour Cloning

In [None]:
import os
import sys
import yaml
import optuna
import pandas as pd
from optuna.terminator.improvement.emmr import torch

if 'google.colab' in sys.modules:
  from google.colab import drive
  drive.mount( "/content/drive")
  if os.path.isdir('drive/MyDrive/Projects/FeelPeek/notebooks'):
    os.chdir('drive/MyDrive/Projects/FeelPeek/notebooks')


project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

with open('../config/bc_experiments_config.yaml', 'r') as f:
    bc_experiments_config = yaml.safe_load(f)

from src.tuning import BCObjectiveTorch

torch.manual_seed(bc_experiments_config['experiment']['seed'])

## Data Loading

In [None]:
rb_train_df = pd.read_parquet('../data/replay_buffer_episodes/rb_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/replay_buffer_episodes/rb_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

## Hyperparameter Tuning and Training

In [None]:
from typing import List, Tuple
from src.datasets import BCDataset
from src.normalization import NormalizationModule

def prepare_data(df: pd.DataFrame,
                 selected_features: List[str],
                 norm_script: NormalizationModule = None) -> Tuple[torch.Tensor, torch.Tensor]:
    x = df.drop(columns=['action'])

    all_features_names = x.columns
    selected_features_ids = [i for i, x in enumerate(all_features_names) if x in selected_features]
    x = torch.tensor(x.values.astype('float32'), dtype=torch.float32)

    # apply normalization if exists
    if norm_script is not None:
        x = norm_script.normalize(x)

    # select only desired features (columns)
    x = x[:, selected_features_ids]
    y = torch.tensor(df['action'].values, dtype=torch.long)

    return x, y

In [None]:
from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import DataLoader


def conduct_experiment(dataset_name: str = 'final_policy',
                       norm_technique_name: str = 'raw',
                       norm_technique_script: NormalizationModule = None,
                       selected_features: List[str] = None,
                       train_df: pd.DataFrame = None,
                       valid_df: pd.DataFrame = None,
                       experiments_config: dict = None) -> None:
    X_train, y_train = prepare_data(df=train_df, selected_features=selected_features, norm_script=norm_technique_script)
    X_valid, y_valid = prepare_data(df=valid_df, selected_features=selected_features, norm_script=norm_technique_script)

    train_dataset = BCDataset(states=X_train, actions=y_train)
    valid_dataset = BCDataset(states=X_valid, actions=y_valid)

    base_log_dir = os.path.abspath(experiments_config['runtime']['log_dir'])
    log_dir = os.path.join(base_log_dir, dataset_name)
    os.makedirs(log_dir, exist_ok=True)

    storage = f"sqlite:///{os.path.join(log_dir, f'BC_{norm_technique_name.lower().replace(" ", "_")}.db')}"

    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=64,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=os.cpu_count(),
                                  persistent_workers = True)
    valid_dataloader = DataLoader(dataset=valid_dataset,
                                  batch_size=64,
                                  shuffle=False,
                                  pin_memory=True,
                                  num_workers=os.cpu_count(),
                                  persistent_workers = True)

    pruner = optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=10,
        interval_steps=1
    )

    objective = BCObjectiveTorch(
        train_loader=train_dataloader,
        eval_loader=valid_dataloader,
        model_dir=os.path.join(bc_experiments_config['runtime']['best_model_dir'], f'{dataset_name}/'),
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
        scoring_fn=balanced_accuracy_score,
        num_features=X_train.shape[1],
        config=bc_experiments_config
    )

    study = optuna.create_study(study_name=f'bc_{dataset_name}_{norm_technique_name.lower().replace(" ", "_")}_data_study',
                                direction='maximize',
                                storage=storage,
                                pruner=pruner,
                                load_if_exists=True)

    study.optimize(objective, n_trials=bc_experiments_config['experiment']['n_optuna_trials'])
    del train_dataset, valid_dataset, train_dataloader, valid_dataloader
    del X_train, X_valid, y_train, y_valid

### Replay Buffer Model

In [None]:
rb_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

#### Raw Normalization

In [None]:
conduct_experiment(dataset_name='replay_buffer',
                   norm_technique_name='raw',
                   norm_technique_script=rb_normalization_techniques['raw'],
                   selected_features=['leg_1', 'leg_2', 'angular_velocity', 'lv_Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Max Abs Normalization

In [None]:
conduct_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Max_Abs',
                   norm_technique_script=rb_normalization_techniques['Max_Abs'],
                   selected_features=['reward', 'angular_velocity', 'leg_1', 'leg_2'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Min-Max Normalization

In [None]:
conduct_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Min_Max',
                   norm_technique_script=rb_normalization_techniques['Min_Max'],
                   selected_features=['reward', 'angular_velocity', 'angle', 'lv_Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Robust Normalization

In [None]:
conduct_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Robust',
                   norm_technique_script=rb_normalization_techniques['Robust'],
                   selected_features=['leg_1', 'leg_2', 'reward', 'Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Standard (z-score) Normalization

In [None]:
conduct_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Standard',
                   norm_technique_script=rb_normalization_techniques['Standard'],
                   selected_features=['leg_1', 'leg_2', 'reward', 'angular_velocity'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

In [None]:
del rb_train_df, rb_valid_df, rb_normalization_techniques

### Final Policy Model

In [None]:
fp_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
fp_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

fp_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/final_policy/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/final_policy/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/final_policy/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/final_policy/normalization/standard_normalization.pt'),
}

#### Raw Data

In [None]:
conduct_experiment(dataset_name='final_policy',
                   norm_technique_name='raw',
                   norm_technique_script=fp_normalization_techniques['raw'],
                   selected_features=['leg_1', 'leg_2', 'angular_velocity', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Max Abs Normalization

In [None]:
conduct_experiment(dataset_name='final_policy',
                   norm_technique_name='Max_Abs',
                   norm_technique_script=fp_normalization_techniques['Max_Abs'],
                   selected_features=['reward', 'angular_velocity', 'leg_1', 'leg_2'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Min Max Normalization

In [None]:
conduct_experiment(dataset_name='final_policy',
                   norm_technique_name='Min_Max',
                   norm_technique_script=fp_normalization_techniques['Min_Max'],
                   selected_features=['reward', 'angular_velocity', 'angle', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Robust Normalization

In [None]:
conduct_experiment(dataset_name='final_policy',
                   norm_technique_name='Robust',
                   norm_technique_script=fp_normalization_techniques['Robust'],
                   selected_features=['leg_1', 'leg_2', 'reward', 'Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Standard (z-score) Normalization

In [None]:
conduct_experiment(dataset_name='final_policy',
                   norm_technique_name='Standard',
                   norm_technique_script=fp_normalization_techniques['Standard'],
                   selected_features=['leg_1', 'leg_2', 'reward', 'angular_velocity'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

In [None]:
del fp_train_df, fp_valid_df, fp_normalization_techniques