# Behaviour Cloning

In [1]:
!pip install optuna




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import sys
import yaml
import pandas as pd
from optuna.terminator.improvement.emmr import torch

if 'google.colab' in sys.modules:
  from google.colab import drive
  drive.mount( "/content/drive")
  if os.path.isdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks'):
    os.chdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks')


project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

with open('../config/bc_experiments_config.yaml', 'r') as f:
    bc_experiments_config = yaml.safe_load(f)

from src.tuning.bc_objective import BCObjectiveTorch

torch.manual_seed(bc_experiments_config['experiment']['seed'])

selected_features = ['X', 'Y', 'lv_X', 'lv_Y', 'angle', 'angular_velocity', 'leg_1', 'leg_2', 'reward']

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import warnings
warnings.filterwarnings('ignore')

## Hyperparameter Tuning and Training

In [4]:
from src.utils.experiments import conduct_bc_experiment

### Replay Buffer Model

In [5]:
rb_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/BC/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/BC/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/BC/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/BC/normalization/standard_normalization.pt'),
}

#### Raw Normalization

In [6]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=rb_normalization_techniques['raw'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:18,485] Using an existing study with name 'bc_replay_buffer_raw_data_study' instead of creating a new one.


Study 'bc_replay_buffer_raw_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:18,846] Using an existing study with name 'bc_replay_buffer_raw_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #6 (rank 1) with params: {'dropout': 0.3, 'lr': 0.00011267852200518671, 'num_hidden_neurons': 64, 'num_hidden_layers': 5, 'weight_decay': 2.1024510534811567e-05}
Skipping retraining for trial #6 (already in refined study).

> Phase 2 – Retraining trial #7 (rank 2) with params: {'dropout': 0.3, 'lr': 0.00022677205168835266, 'num_hidden_neurons': 128, 'num_hidden_layers': 6, 'weight_decay': 7.126842051592855e-05}
Skipping retraining for trial #7 (already in refined study).

> Phase 2 – Retraining trial #2 (rank 3) with params: {'dropout': 0.15000000000000002, 'lr': 0.00023889177623023005, 'num_hidden_neurons': 96, 'num_hidden_layers': 2, 'weight_decay': 2.4113048072505867e-05}
Skipping retraining for trial #2 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 4) with params: {'dropout': 0.15000000000000002, 'lr': 0.0008670170447458302, 'num_hidden_neurons': 128, 'num_hidden_layers': 1, 'weig

#### Max Abs Normalization

In [7]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=rb_normalization_techniques['Max_Abs'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:19,353] Using an existing study with name 'bc_replay_buffer_max_abs_data_study' instead of creating a new one.


Study 'bc_replay_buffer_max_abs_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:19,742] Using an existing study with name 'bc_replay_buffer_max_abs_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #6 (rank 1) with params: {'dropout': 0.1, 'lr': 0.00044113929441498735, 'num_hidden_neurons': 64, 'num_hidden_layers': 5, 'weight_decay': 1.0200982533874021e-05}
Skipping retraining for trial #6 (already in refined study).

> Phase 2 – Retraining trial #14 (rank 2) with params: {'dropout': 0.2, 'lr': 0.00017920779283510916, 'num_hidden_neurons': 128, 'num_hidden_layers': 4, 'weight_decay': 1.542140109047849e-05}
Skipping retraining for trial #14 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 3) with params: {'dropout': 0.0, 'lr': 0.0009489115175510803, 'num_hidden_neurons': 32, 'num_hidden_layers': 2, 'weight_decay': 1.0542381010753174e-05}
Skipping retraining for trial #10 (already in refined study).

> Phase 2 – Retraining trial #7 (rank 4) with params: {'dropout': 0.0, 'lr': 0.0002533046542800505, 'num_hidden_neurons': 96, 'num_hidden_layers': 3, 'weight_decay': 2.4572655209003912e-

#### Min-Max Normalization

In [8]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=rb_normalization_techniques['Min_Max'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:20,266] Using an existing study with name 'bc_replay_buffer_min_max_data_study' instead of creating a new one.


Study 'bc_replay_buffer_min_max_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:20,602] Using an existing study with name 'bc_replay_buffer_min_max_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #5 (rank 1) with params: {'dropout': 0.2, 'lr': 0.00036056916633268025, 'num_hidden_neurons': 128, 'num_hidden_layers': 5, 'weight_decay': 2.2378990220310006e-05}
Skipping retraining for trial #5 (already in refined study).

> Phase 2 – Retraining trial #14 (rank 2) with params: {'dropout': 0.05, 'lr': 0.0009852263709385822, 'num_hidden_neurons': 32, 'num_hidden_layers': 1, 'weight_decay': 1.3343772109110247e-05}
Skipping retraining for trial #14 (already in refined study).

> Phase 2 – Retraining trial #19 (rank 3) with params: {'dropout': 0.25, 'lr': 0.0009933914859760393, 'num_hidden_neurons': 64, 'num_hidden_layers': 4, 'weight_decay': 1.0179650158725787e-05}
Skipping retraining for trial #19 (already in refined study).

> Phase 2 – Retraining trial #3 (rank 4) with params: {'dropout': 0.2, 'lr': 0.0007141032004701984, 'num_hidden_neurons': 96, 'num_hidden_layers': 3, 'weight_decay': 9.889047076473612e

#### Robust Normalization

In [9]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=rb_normalization_techniques['Robust'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:21,062] Using an existing study with name 'bc_replay_buffer_robust_data_study' instead of creating a new one.


Study 'bc_replay_buffer_robust_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:21,372] Using an existing study with name 'bc_replay_buffer_robust_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #5 (rank 1) with params: {'dropout': 0.05, 'lr': 0.0006309944109272132, 'num_hidden_neurons': 128, 'num_hidden_layers': 5, 'weight_decay': 2.3364764152182742e-05}
Skipping retraining for trial #5 (already in refined study).

> Phase 2 – Retraining trial #9 (rank 2) with params: {'dropout': 0.05, 'lr': 0.0007036961739246008, 'num_hidden_neurons': 128, 'num_hidden_layers': 4, 'weight_decay': 6.932546385440103e-05}
Skipping retraining for trial #9 (already in refined study).

> Phase 2 – Retraining trial #0 (rank 3) with params: {'dropout': 0.05, 'lr': 0.000616584567246136, 'num_hidden_neurons': 128, 'num_hidden_layers': 6, 'weight_decay': 6.705733546567335e-05}
Skipping retraining for trial #0 (already in refined study).

> Phase 2 – Retraining trial #2 (rank 4) with params: {'dropout': 0.3, 'lr': 0.0003587344751762964, 'num_hidden_neurons': 32, 'num_hidden_layers': 5, 'weight_decay': 5.533605418859961e-05}


#### Standard (z-score) Normalization

In [10]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=rb_normalization_techniques['Standard'],
                   selected_features=selected_features,
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:21,833] Using an existing study with name 'bc_replay_buffer_standard_data_study' instead of creating a new one.


Study 'bc_replay_buffer_standard_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:22,215] Using an existing study with name 'bc_replay_buffer_standard_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #11 (rank 1) with params: {'dropout': 0.05, 'lr': 0.0004894750295844869, 'num_hidden_neurons': 32, 'num_hidden_layers': 5, 'weight_decay': 4.5266674192896745e-05}
Skipping retraining for trial #11 (already in refined study).

> Phase 2 – Retraining trial #3 (rank 2) with params: {'dropout': 0.0, 'lr': 0.00013340841595960805, 'num_hidden_neurons': 96, 'num_hidden_layers': 4, 'weight_decay': 8.602364142186003e-05}
Skipping retraining for trial #3 (already in refined study).

> Phase 2 – Retraining trial #2 (rank 3) with params: {'dropout': 0.25, 'lr': 0.00014136004609980528, 'num_hidden_neurons': 64, 'num_hidden_layers': 4, 'weight_decay': 2.8466577565896817e-05}
Skipping retraining for trial #2 (already in refined study).

> Phase 2 – Retraining trial #0 (rank 4) with params: {'dropout': 0.2, 'lr': 0.0006828353595640268, 'num_hidden_neurons': 32, 'num_hidden_layers': 3, 'weight_decay': 1.3337379482124548e-0

In [11]:
del rb_train_df, rb_valid_df, rb_normalization_techniques

### Final Policy Model

In [12]:
fp_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
fp_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

fp_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/final_policy/BC/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/final_policy/BC/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/final_policy/BC/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/final_policy/BC/normalization/standard_normalization.pt'),
}

#### Raw Data

In [13]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=fp_normalization_techniques['raw'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:22,718] Using an existing study with name 'bc_final_policy_raw_data_study' instead of creating a new one.


Study 'bc_final_policy_raw_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:23,024] Using an existing study with name 'bc_final_policy_raw_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #0 (rank 1) with params: {'dropout': 0.25, 'lr': 0.0006039150663198806, 'num_hidden_neurons': 32, 'num_hidden_layers': 2, 'weight_decay': 7.406886803770007e-05}
Skipping retraining for trial #0 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 2) with params: {'dropout': 0.05, 'lr': 0.0002309909037773971, 'num_hidden_neurons': 96, 'num_hidden_layers': 6, 'weight_decay': 1.4472285797851659e-05}
Skipping retraining for trial #10 (already in refined study).

> Phase 2 – Retraining trial #5 (rank 3) with params: {'dropout': 0.0, 'lr': 0.00012378841127036596, 'num_hidden_neurons': 96, 'num_hidden_layers': 6, 'weight_decay': 2.4158494925907964e-05}
Skipping retraining for trial #5 (already in refined study).

> Phase 2 – Retraining trial #7 (rank 4) with params: {'dropout': 0.05, 'lr': 0.0005850728682124547, 'num_hidden_neurons': 32, 'num_hidden_layers': 5, 'weight_decay': 2.6746509784526703e-0

#### Max Abs Normalization

In [14]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=fp_normalization_techniques['Max_Abs'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:23,456] Using an existing study with name 'bc_final_policy_max_abs_data_study' instead of creating a new one.


Study 'bc_final_policy_max_abs_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:23,770] Using an existing study with name 'bc_final_policy_max_abs_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #19 (rank 1) with params: {'dropout': 0.05, 'lr': 0.0006898117676629517, 'num_hidden_neurons': 64, 'num_hidden_layers': 2, 'weight_decay': 3.45776041261418e-05}
Skipping retraining for trial #19 (already in refined study).

> Phase 2 – Retraining trial #16 (rank 2) with params: {'dropout': 0.05, 'lr': 0.000968945485817633, 'num_hidden_neurons': 64, 'num_hidden_layers': 4, 'weight_decay': 2.4334771677460766e-05}
Skipping retraining for trial #16 (already in refined study).

> Phase 2 – Retraining trial #5 (rank 3) with params: {'dropout': 0.2, 'lr': 0.00020604084388691944, 'num_hidden_neurons': 32, 'num_hidden_layers': 6, 'weight_decay': 4.435296908677459e-05}
Skipping retraining for trial #5 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 4) with params: {'dropout': 0.0, 'lr': 0.00010024950883206169, 'num_hidden_neurons': 96, 'num_hidden_layers': 3, 'weight_decay': 1.742712027008527e-05

#### Min Max Normalization

In [15]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=fp_normalization_techniques['Min_Max'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:24,198] Using an existing study with name 'bc_final_policy_min_max_data_study' instead of creating a new one.


Study 'bc_final_policy_min_max_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:24,503] Using an existing study with name 'bc_final_policy_min_max_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #9 (rank 1) with params: {'dropout': 0.05, 'lr': 0.0001471645038888605, 'num_hidden_neurons': 96, 'num_hidden_layers': 4, 'weight_decay': 6.834049726670672e-05}
Skipping retraining for trial #9 (already in refined study).

> Phase 2 – Retraining trial #2 (rank 2) with params: {'dropout': 0.25, 'lr': 0.00039379180276059926, 'num_hidden_neurons': 64, 'num_hidden_layers': 5, 'weight_decay': 1.1779313542599233e-05}
Skipping retraining for trial #2 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 3) with params: {'dropout': 0.0, 'lr': 0.00024069610571808981, 'num_hidden_neurons': 64, 'num_hidden_layers': 6, 'weight_decay': 1.591884334950112e-05}
Skipping retraining for trial #10 (already in refined study).

> Phase 2 – Retraining trial #8 (rank 4) with params: {'dropout': 0.0, 'lr': 0.00023661191424049666, 'num_hidden_neurons': 32, 'num_hidden_layers': 3, 'weight_decay': 1.730152734693113e-05

#### Robust Normalization

In [16]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=fp_normalization_techniques['Robust'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:24,930] Using an existing study with name 'bc_final_policy_robust_data_study' instead of creating a new one.


Study 'bc_final_policy_robust_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:25,259] Using an existing study with name 'bc_final_policy_robust_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #9 (rank 1) with params: {'dropout': 0.3, 'lr': 0.0006345431557777041, 'num_hidden_neurons': 96, 'num_hidden_layers': 4, 'weight_decay': 5.391158494408333e-05}
Skipping retraining for trial #9 (already in refined study).

> Phase 2 – Retraining trial #4 (rank 2) with params: {'dropout': 0.2, 'lr': 0.0006863033019930411, 'num_hidden_neurons': 96, 'num_hidden_layers': 3, 'weight_decay': 3.8392788589097195e-05}
Skipping retraining for trial #4 (already in refined study).

> Phase 2 – Retraining trial #0 (rank 3) with params: {'dropout': 0.1, 'lr': 0.0005449560995276552, 'num_hidden_neurons': 32, 'num_hidden_layers': 5, 'weight_decay': 1.5335927159616028e-05}
Skipping retraining for trial #0 (already in refined study).

> Phase 2 – Retraining trial #1 (rank 4) with params: {'dropout': 0.15000000000000002, 'lr': 0.0005322657331860189, 'num_hidden_neurons': 32, 'num_hidden_layers': 3, 'weight_decay': 4.466960357

#### Standard (z-score) Normalization

In [17]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=fp_normalization_techniques['Standard'],
                   selected_features=selected_features,
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-04 19:37:25,700] Using an existing study with name 'bc_final_policy_standard_data_study' instead of creating a new one.


Study 'bc_final_policy_standard_data_study' already has 20/20 trials. Skipping optimization.
Phase 2:


[I 2025-11-04 19:37:26,012] Using an existing study with name 'bc_final_policy_standard_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #0 (rank 1) with params: {'dropout': 0.0, 'lr': 0.0007013252500049149, 'num_hidden_neurons': 64, 'num_hidden_layers': 6, 'weight_decay': 2.1442637561204632e-05}


Trial 0 Epochs:   7%|▋         | 18/250 [02:03<26:31,  6.86s/it]


Validation Accuracy: 54.68%
Finished retraining (rank 1) — Final loss: 2.8121

> Phase 2 – Retraining trial #2 (rank 2) with params: {'dropout': 0.15000000000000002, 'lr': 0.00047874290174029294, 'num_hidden_neurons': 32, 'num_hidden_layers': 6, 'weight_decay': 3.361653156494276e-05}


Trial 0 Epochs:   3%|▎         | 7/250 [00:55<31:53,  7.87s/it]


Validation Accuracy: 54.81%
Finished retraining (rank 2) — Final loss: 1.3492

> Phase 2 – Retraining trial #14 (rank 3) with params: {'dropout': 0.05, 'lr': 0.0009932734781305896, 'num_hidden_neurons': 128, 'num_hidden_layers': 1, 'weight_decay': 5.045739718452868e-05}


Trial 0 Epochs: 100%|██████████| 250/250 [17:38<00:00,  4.23s/it]


Validation Accuracy: 91.65%
Finished retraining (rank 3) — Final loss: 0.1770

> Phase 2 – Retraining trial #3 (rank 4) with params: {'dropout': 0.2, 'lr': 0.0004221677913574138, 'num_hidden_neurons': 96, 'num_hidden_layers': 5, 'weight_decay': 3.4314340374034815e-05}


Trial 0 Epochs:   3%|▎         | 7/250 [00:50<29:07,  7.19s/it]


Validation Accuracy: 54.68%
Finished retraining (rank 4) — Final loss: 1.7440

> Phase 2 – Retraining trial #1 (rank 5) with params: {'dropout': 0.1, 'lr': 0.00012373420868928966, 'num_hidden_neurons': 32, 'num_hidden_layers': 5, 'weight_decay': 2.5873748522017926e-05}


Trial 0 Epochs: 100%|██████████| 250/250 [26:18<00:00,  6.32s/it]


Validation Accuracy: 91.16%
Finished retraining (rank 5) — Final loss: 0.1900
Two-phase optimization complete. Phase 2 results saved to: sqlite:///c:\Users\mshko\Documents\MIRO\Offline_RL_BSc_Thesis\logs\final_policy\BC_standard_refined.db


In [18]:
del fp_train_df, fp_valid_df, fp_normalization_techniques

## Exit the Runtime if using Google Colab

In [19]:
if 'google.colab' in sys.modules:
  import IPython
  import time
  IPython.display.display(IPython.display.Javascript('IPython.notebook.save_checkpoint();'))

  time.sleep(2)
  IPython.display.display(IPython.display.Javascript('google.colab.kernel.disconnect();'))