# Behaviour Cloning

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.5.0


In [2]:
import os
import sys
import yaml
import pandas as pd
from optuna.terminator.improvement.emmr import torch

if 'google.colab' in sys.modules:
  from google.colab import drive
  drive.mount( "/content/drive")
  if os.path.isdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks'):
    os.chdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks')


project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

with open('../config/bc_experiments_config.yaml', 'r') as f:
    bc_experiments_config = yaml.safe_load(f)

from src.tuning.bc_objective import BCObjectiveTorch

torch.manual_seed(bc_experiments_config['experiment']['seed'])

selected_features = ['X', 'Y', 'lv_X', 'lv_Y', 'angle', 'angular_velocity', 'leg_1', 'leg_2', 'reward']

Mounted at /content/drive


In [3]:
import warnings
warnings.filterwarnings('ignore')

## Data Loading

In [4]:
rb_train_df = pd.read_parquet('../data/replay_buffer_episodes/rb_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/replay_buffer_episodes/rb_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

## Hyperparameter Tuning and Training

In [5]:
from src.utils.experiments import conduct_bc_experiment

### Replay Buffer Model

In [6]:
rb_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

#### Raw Normalization

In [None]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=rb_normalization_techniques['raw'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-03 12:32:02,717] Using an existing study with name 'bc_replay_buffer_raw_data_study' instead of creating a new one.


Study 'bc_replay_buffer_raw_data_study' already has 7 trials. Running 13 more...


Trial 7 Epochs:  11%|█         | 11/100 [01:22<11:04,  7.47s/it]
[I 2025-11-03 12:33:29,868] Trial 7 finished with value: 1.5442268079832981 and parameters: {'dropout': 0.25, 'lr': 0.0007782669500047419, 'num_hidden_neurons': 64, 'num_hidden_layers': 2, 'weight_decay': 3.0489980402697803e-05}. Best is trial 5 with value: 0.8958756860933805.
Trial 8 Epochs:  20%|██        | 20/100 [02:33<10:15,  7.70s/it]
[I 2025-11-03 12:36:03,890] Trial 8 pruned. 
Trial 9 Epochs:   7%|▋         | 7/100 [01:06<14:46,  9.53s/it]

#### Max Abs Normalization

In [None]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=rb_normalization_techniques['Max_Abs'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Min-Max Normalization

In [None]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=rb_normalization_techniques['Min_Max'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Robust Normalization

In [None]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=rb_normalization_techniques['Robust'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

#### Standard (z-score) Normalization

In [None]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=rb_normalization_techniques['Standard'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

In [None]:
del rb_train_df, rb_valid_df, rb_normalization_techniques

### Final Policy Model

In [None]:
fp_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
fp_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

fp_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/final_policy/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/final_policy/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/final_policy/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/final_policy/normalization/standard_normalization.pt'),
}

#### Raw Data

In [None]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=fp_normalization_techniques['raw'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Max Abs Normalization

In [None]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=fp_normalization_techniques['Max_Abs'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Min Max Normalization

In [None]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=fp_normalization_techniques['Min_Max'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Robust Normalization

In [None]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=fp_normalization_techniques['Robust'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

#### Standard (z-score) Normalization

In [None]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=fp_normalization_techniques['Standard'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

In [None]:
del fp_train_df, fp_valid_df, fp_normalization_techniques

In [None]:
if 'google.colab' in sys.modules:
  import IPython
  import time
  IPython.display.display(IPython.display.Javascript('IPython.notebook.save_checkpoint();'))

  time.sleep(2)
  IPython.display.display(IPython.display.Javascript('google.colab.kernel.disconnect();'))