# Behaviour Cloning

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.5.0


In [2]:
import os
import sys
import yaml
import pandas as pd
from optuna.terminator.improvement.emmr import torch

if 'google.colab' in sys.modules:
  from google.colab import drive
  drive.mount( "/content/drive")
  if os.path.isdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks'):
    os.chdir('drive/MyDrive/Projects/Offline_RL_BSc_Thesis/notebooks')


project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

with open('../config/bc_experiments_config.yaml', 'r') as f:
    bc_experiments_config = yaml.safe_load(f)

from src.tuning.bc_objective import BCObjectiveTorch

torch.manual_seed(bc_experiments_config['experiment']['seed'])

Mounted at /content/drive


<torch._C.Generator at 0x7c0770718150>

## Data Loading

In [3]:
rb_train_df = pd.read_parquet('../data/replay_buffer_episodes/rb_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/replay_buffer_episodes/rb_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

## Hyperparameter Tuning and Training

In [4]:
from src.utils.experiments import conduct_bc_experiment

### Replay Buffer Model

In [5]:
rb_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
rb_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

rb_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/replay_buffer/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/replay_buffer/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/replay_buffer/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/replay_buffer/normalization/standard_normalization.pt'),
}

#### Raw Normalization

In [6]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=rb_normalization_techniques['raw'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'angular_velocity', 'lv_Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:15:44,552] Using an existing study with name 'bc_replay_buffer_raw_data_study' instead of creating a new one.


Study 'bc_replay_buffer_raw_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:15:45,667] Using an existing study with name 'bc_replay_buffer_raw_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #53 (rank 1) with params: {'dropout': 0.0, 'lr': 2.0824202862558877e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 10, 'weight_decay': 5.8106058510747134e-06}
Skipping retraining for trial #53 (already in refined study).

> Phase 2 – Retraining trial #11 (rank 2) with params: {'dropout': 0.0, 'lr': 1.138507722024752e-05, 'num_hidden_neurons': 160, 'num_hidden_layers': 9, 'weight_decay': 4.3362213086356403e-05}
Skipping retraining for trial #11 (already in refined study).

> Phase 2 – Retraining trial #1 (rank 3) with params: {'dropout': 0.0, 'lr': 1.9962689890064292e-05, 'num_hidden_neurons': 256, 'num_hidden_layers': 9, 'weight_decay': 0.00012794890719998307}
Skipping retraining for trial #1 (already in refined study).

> Phase 2 – Retraining trial #10 (rank 4) with params: {'dropout': 0.0, 'lr': 1.553996526256845e-05, 'num_hidden_neurons': 160, 'num_hidden_layers': 9, 'weight_decay': 6.6699031259291

#### Max Abs Normalization

In [7]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=rb_normalization_techniques['Max_Abs'],
                   selected_features=['X', 'Y', 'reward', 'angular_velocity', 'leg_1', 'leg_2'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:15:47,340] Using an existing study with name 'bc_replay_buffer_max_abs_data_study' instead of creating a new one.


Study 'bc_replay_buffer_max_abs_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:15:48,616] Using an existing study with name 'bc_replay_buffer_max_abs_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #51 (rank 1) with params: {'dropout': 0.0, 'lr': 1.0272766762128013e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 8, 'weight_decay': 4.575838117777828e-06}
Skipping retraining for trial #51 (already in refined study).

> Phase 2 – Retraining trial #62 (rank 2) with params: {'dropout': 0.0, 'lr': 1.1472315616559427e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 8, 'weight_decay': 2.547258184038193e-06}
Skipping retraining for trial #62 (already in refined study).

> Phase 2 – Retraining trial #50 (rank 3) with params: {'dropout': 0.0, 'lr': 1.2091213659649399e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 8, 'weight_decay': 4.183470990715837e-06}
Skipping retraining for trial #50 (already in refined study).

> Phase 2 – Retraining trial #49 (rank 4) with params: {'dropout': 0.0, 'lr': 1.0734841886525684e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 6, 'weight_decay': 4.764465971124

#### Min-Max Normalization

In [8]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=rb_normalization_techniques['Min_Max'],
                   selected_features=['X', 'Y', 'reward', 'angular_velocity', 'angle', 'lv_Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:15:49,898] Using an existing study with name 'bc_replay_buffer_min_max_data_study' instead of creating a new one.


Study 'bc_replay_buffer_min_max_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:15:51,348] Using an existing study with name 'bc_replay_buffer_min_max_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #89 (rank 1) with params: {'dropout': 0.0, 'lr': 1.8366528771477992e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 10, 'weight_decay': 5.563077262580885e-06}
Skipping retraining for trial #89 (already in refined study).

> Phase 2 – Retraining trial #80 (rank 2) with params: {'dropout': 0.0, 'lr': 1.3795456392272787e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 10, 'weight_decay': 7.94980982857873e-06}
Skipping retraining for trial #80 (already in refined study).

> Phase 2 – Retraining trial #91 (rank 3) with params: {'dropout': 0.0, 'lr': 1.1176478055999253e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 10, 'weight_decay': 1.8345242792847033e-06}
Skipping retraining for trial #91 (already in refined study).

> Phase 2 – Retraining trial #84 (rank 4) with params: {'dropout': 0.0, 'lr': 1.4399657041219794e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 9, 'weight_decay': 6.80912818586

#### Robust Normalization

In [9]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=rb_normalization_techniques['Robust'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'lv_Y'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:15:52,523] Using an existing study with name 'bc_replay_buffer_robust_data_study' instead of creating a new one.


Study 'bc_replay_buffer_robust_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:15:53,749] Using an existing study with name 'bc_replay_buffer_robust_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #61 (rank 1) with params: {'dropout': 0.0, 'lr': 1.630669603563643e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 8, 'weight_decay': 0.0008571669826908238}
Skipping retraining for trial #61 (already in refined study).

> Phase 2 – Retraining trial #70 (rank 2) with params: {'dropout': 0.0, 'lr': 1.4719411044909978e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 9, 'weight_decay': 9.12330791740292e-05}
Skipping retraining for trial #70 (already in refined study).

> Phase 2 – Retraining trial #66 (rank 3) with params: {'dropout': 0.0, 'lr': 1.7608730079279216e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 10, 'weight_decay': 0.00015732534098069552}
Skipping retraining for trial #66 (already in refined study).

> Phase 2 – Retraining trial #81 (rank 4) with params: {'dropout': 0.0, 'lr': 1.6615435843855557e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 9, 'weight_decay': 0.0010558640154096

#### Standard (z-score) Normalization

In [10]:
conduct_bc_experiment(dataset_name='replay_buffer',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=rb_normalization_techniques['Standard'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'angular_velocity'],
                   train_df=rb_train_df,
                   valid_df=rb_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:15:54,798] Using an existing study with name 'bc_replay_buffer_standard_data_study' instead of creating a new one.


Study 'bc_replay_buffer_standard_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:15:56,124] Using an existing study with name 'bc_replay_buffer_standard_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #97 (rank 1) with params: {'dropout': 0.0, 'lr': 1.481696920000349e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 10, 'weight_decay': 9.806263308865382e-05}
Skipping retraining for trial #97 (already in refined study).

> Phase 2 – Retraining trial #81 (rank 2) with params: {'dropout': 0.0, 'lr': 2.066921816997584e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 10, 'weight_decay': 4.689876680468691e-05}
Skipping retraining for trial #81 (already in refined study).

> Phase 2 – Retraining trial #92 (rank 3) with params: {'dropout': 0.0, 'lr': 2.5019156875865792e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 10, 'weight_decay': 0.00012252525600715675}
Skipping retraining for trial #92 (already in refined study).

> Phase 2 – Retraining trial #88 (rank 4) with params: {'dropout': 0.0, 'lr': 1.5240437803690604e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 9, 'weight_decay': 7.6312725048121

In [11]:
del rb_train_df, rb_valid_df, rb_normalization_techniques

### Final Policy Model

In [12]:
fp_train_df = pd.read_parquet('../data/final_policy_episodes/fp_train.parquet').drop(columns=['done', 'episode'])
fp_valid_df = pd.read_parquet('../data/final_policy_episodes/fp_valid.parquet').drop(columns=['done', 'episode'])

fp_normalization_techniques = {
    'raw': None,
    'Max_Abs': torch.jit.load(f'../models/final_policy/normalization/max_abs_normalization.pt'),
    'Min_Max': torch.jit.load(f'../models/final_policy/normalization/min_max_normalization.pt'),
    'Robust': torch.jit.load(f'../models/final_policy/normalization/robust_normalization.pt'),
    'Standard': torch.jit.load(f'../models/final_policy/normalization/standard_normalization.pt'),
}

#### Raw Data

In [13]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='raw',
                   output_model_name='BC_raw',
                   norm_technique_script=fp_normalization_techniques['raw'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'angular_velocity', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:16:00,474] Using an existing study with name 'bc_final_policy_raw_data_study' instead of creating a new one.


Study 'bc_final_policy_raw_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:16:01,516] A new study created in RDB with name: bc_final_policy_raw_data_study_refined


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #85 (rank 1) with params: {'dropout': 0.0, 'lr': 1.3699217967010149e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 3, 'weight_decay': 0.0014934465175233489}


Trial 0 Epochs:  60%|██████    | 121/200 [14:01<09:09,  6.96s/it]


Finished retraining (rank 1) — Final balanced accuracy: 0.6913

> Phase 2 – Retraining trial #53 (rank 2) with params: {'dropout': 0.05, 'lr': 2.3989561831792946e-05, 'num_hidden_neurons': 160, 'num_hidden_layers': 5, 'weight_decay': 3.1664298926550693e-06}


Trial 0 Epochs:   6%|▋         | 13/200 [02:12<31:52, 10.23s/it]


Finished retraining (rank 2) — Final balanced accuracy: 0.3957

> Phase 2 – Retraining trial #67 (rank 3) with params: {'dropout': 0.0, 'lr': 2.2885285940778856e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 4, 'weight_decay': 1.2255689963286532e-06}


Trial 0 Epochs:  32%|███▏      | 64/200 [08:29<18:03,  7.97s/it]


Finished retraining (rank 3) — Final balanced accuracy: 0.6542

> Phase 2 – Retraining trial #69 (rank 4) with params: {'dropout': 0.0, 'lr': 3.264645141628127e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 3, 'weight_decay': 1.3796682150643075e-06}


Trial 0 Epochs:  73%|███████▎  | 146/200 [16:57<06:16,  6.97s/it]


Finished retraining (rank 4) — Final balanced accuracy: 0.6875

> Phase 2 – Retraining trial #86 (rank 5) with params: {'dropout': 0.05, 'lr': 1.3603764092941612e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 4, 'weight_decay': 0.00149378494098276}


Trial 0 Epochs:   6%|▌         | 11/200 [01:43<29:31,  9.37s/it]


Finished retraining (rank 5) — Final balanced accuracy: 0.4147
Two-phase optimization complete. Phase 2 results saved to: sqlite:////content/drive/MyDrive/Projects/Offline_RL_BSc_Thesis/logs/final_policy/BC_raw_refined.db


#### Max Abs Normalization

In [14]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Max_Abs',
                   output_model_name='BC_max_abs',
                   norm_technique_script=fp_normalization_techniques['Max_Abs'],
                   selected_features=['X', 'Y', 'reward', 'angular_velocity', 'leg_1', 'leg_2'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:59:35,410] Using an existing study with name 'bc_final_policy_max_abs_data_study' instead of creating a new one.


Study 'bc_final_policy_max_abs_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:59:36,739] Using an existing study with name 'bc_final_policy_max_abs_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #30 (rank 1) with params: {'dropout': 0.45, 'lr': 0.0007366070170670772, 'num_hidden_neurons': 192, 'num_hidden_layers': 3, 'weight_decay': 7.65358332756472e-06}
Skipping retraining for trial #30 (already in refined study).

> Phase 2 – Retraining trial #25 (rank 2) with params: {'dropout': 0.45, 'lr': 0.0006004172365563787, 'num_hidden_neurons': 192, 'num_hidden_layers': 3, 'weight_decay': 1.1421446130252108e-05}
Skipping retraining for trial #25 (already in refined study).

> Phase 2 – Retraining trial #2 (rank 3) with params: {'dropout': 0.4, 'lr': 0.0004013915608611041, 'num_hidden_neurons': 192, 'num_hidden_layers': 4, 'weight_decay': 1.0594666373170474e-05}
Skipping retraining for trial #2 (already in refined study).

> Phase 2 – Retraining trial #81 (rank 4) with params: {'dropout': 0.35000000000000003, 'lr': 5.252397940179052e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 3, 'weight_decay': 1

#### Min Max Normalization

In [15]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Min_Max',
                   output_model_name='BC_min_max',
                   norm_technique_script=fp_normalization_techniques['Min_Max'],
                   selected_features=['X', 'Y', 'reward', 'angular_velocity', 'angle', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 12:59:37,810] Using an existing study with name 'bc_final_policy_min_max_data_study' instead of creating a new one.


Study 'bc_final_policy_min_max_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 12:59:39,341] Using an existing study with name 'bc_final_policy_min_max_data_study_refined' instead of creating a new one.


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #4 (rank 1) with params: {'dropout': 0.0, 'lr': 0.0002631685368614807, 'num_hidden_neurons': 64, 'num_hidden_layers': 3, 'weight_decay': 1.7868032249325914e-06}
Skipping retraining for trial #4 (already in refined study).

> Phase 2 – Retraining trial #40 (rank 2) with params: {'dropout': 0.05, 'lr': 0.00011016640498965951, 'num_hidden_neurons': 128, 'num_hidden_layers': 4, 'weight_decay': 0.00015995512878406434}
Skipping retraining for trial #40 (already in refined study).

> Phase 2 – Retraining trial #14 (rank 3) with params: {'dropout': 0.0, 'lr': 0.0001581500452578538, 'num_hidden_neurons': 96, 'num_hidden_layers': 3, 'weight_decay': 8.80754776119105e-05}
Skipping retraining for trial #14 (already in refined study).

> Phase 2 – Retraining trial #3 (rank 4) with params: {'dropout': 0.05, 'lr': 0.00048078289083895536, 'num_hidden_neurons': 128, 'num_hidden_layers': 4, 'weight_decay': 2.18434414198205e-

Trial 0 Epochs:   6%|▌         | 11/200 [02:19<39:49, 12.64s/it]


Finished retraining (rank 5) — Final balanced accuracy: 0.4009
Two-phase optimization complete. Phase 2 results saved to: sqlite:////content/drive/MyDrive/Projects/Offline_RL_BSc_Thesis/logs/final_policy/BC_min_max_refined.db


#### Robust Normalization

In [16]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Robust',
                   output_model_name='BC_robust',
                   norm_technique_script=fp_normalization_techniques['Robust'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'lv_Y'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 13:02:01,025] Using an existing study with name 'bc_final_policy_robust_data_study' instead of creating a new one.


Study 'bc_final_policy_robust_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 13:02:02,344] A new study created in RDB with name: bc_final_policy_robust_data_study_refined


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #82 (rank 1) with params: {'dropout': 0.0, 'lr': 1.3241093967295995e-05, 'num_hidden_neurons': 96, 'num_hidden_layers': 8, 'weight_decay': 3.460560265827231e-06}


Trial 0 Epochs:  25%|██▌       | 50/200 [09:28<28:25, 11.37s/it]


Finished retraining (rank 1) — Final balanced accuracy: 0.6448

> Phase 2 – Retraining trial #21 (rank 2) with params: {'dropout': 0.0, 'lr': 1.0062359193020975e-05, 'num_hidden_neurons': 224, 'num_hidden_layers': 5, 'weight_decay': 1.2826066576919312e-06}


Trial 0 Epochs:  29%|██▉       | 58/200 [08:52<21:44,  9.19s/it]


Finished retraining (rank 2) — Final balanced accuracy: 0.6911

> Phase 2 – Retraining trial #97 (rank 3) with params: {'dropout': 0.0, 'lr': 1.0991468589524581e-05, 'num_hidden_neurons': 64, 'num_hidden_layers': 8, 'weight_decay': 1.3956669858329311e-06}


Trial 0 Epochs:  14%|█▍        | 29/200 [05:33<32:45, 11.50s/it]


Finished retraining (rank 3) — Final balanced accuracy: 0.6746

> Phase 2 – Retraining trial #68 (rank 4) with params: {'dropout': 0.0, 'lr': 1.354452159185287e-05, 'num_hidden_neurons': 64, 'num_hidden_layers': 8, 'weight_decay': 2.218028854920155e-06}


Trial 0 Epochs:  22%|██▏       | 43/200 [08:08<29:44, 11.37s/it]


Finished retraining (rank 4) — Final balanced accuracy: 0.6650

> Phase 2 – Retraining trial #87 (rank 5) with params: {'dropout': 0.0, 'lr': 1.1848973595374743e-05, 'num_hidden_neurons': 64, 'num_hidden_layers': 8, 'weight_decay': 1.851314243359433e-06}


Trial 0 Epochs:  26%|██▌       | 51/200 [09:42<28:20, 11.41s/it]


Finished retraining (rank 5) — Final balanced accuracy: 0.7110
Two-phase optimization complete. Phase 2 results saved to: sqlite:////content/drive/MyDrive/Projects/Offline_RL_BSc_Thesis/logs/final_policy/BC_robust_refined.db


#### Standard (z-score) Normalization

In [17]:
conduct_bc_experiment(dataset_name='final_policy',
                   norm_technique_name='Standard',
                   output_model_name='BC_standard',
                   norm_technique_script=fp_normalization_techniques['Standard'],
                   selected_features=['X', 'Y', 'leg_1', 'leg_2', 'reward', 'angular_velocity'],
                   train_df=fp_train_df,
                   valid_df=fp_valid_df,
                   experiments_config=bc_experiments_config)

Phase 1:


[I 2025-11-01 13:43:51,690] Using an existing study with name 'bc_final_policy_standard_data_study' instead of creating a new one.


Study 'bc_final_policy_standard_data_study' already has 100/100 trials. Skipping optimization.
Phase 2:


[I 2025-11-01 13:43:52,680] A new study created in RDB with name: bc_final_policy_standard_data_study_refined


5
Refining top 5 configurations on full dataset...

> Phase 2 – Retraining trial #85 (rank 1) with params: {'dropout': 0.0, 'lr': 1.4947617862618928e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 5, 'weight_decay': 0.00015661700316116258}


Trial 0 Epochs:  32%|███▎      | 65/200 [09:52<20:30,  9.12s/it]


Finished retraining (rank 1) — Final balanced accuracy: 0.7784

> Phase 2 – Retraining trial #94 (rank 2) with params: {'dropout': 0.0, 'lr': 1.5858502074184944e-05, 'num_hidden_neurons': 160, 'num_hidden_layers': 5, 'weight_decay': 9.805155439672058e-05}


Trial 0 Epochs:   7%|▋         | 14/200 [02:14<29:48,  9.61s/it]


Finished retraining (rank 2) — Final balanced accuracy: 0.5398

> Phase 2 – Retraining trial #99 (rank 3) with params: {'dropout': 0.0, 'lr': 1.0058977855548607e-05, 'num_hidden_neurons': 192, 'num_hidden_layers': 5, 'weight_decay': 9.50287183737157e-05}


Trial 0 Epochs:  30%|███       | 60/200 [09:09<21:21,  9.15s/it]


Finished retraining (rank 3) — Final balanced accuracy: 0.7644

> Phase 2 – Retraining trial #95 (rank 4) with params: {'dropout': 0.0, 'lr': 1.0822823140784415e-05, 'num_hidden_neurons': 224, 'num_hidden_layers': 5, 'weight_decay': 7.000640953520273e-05}


Trial 0 Epochs:  40%|████      | 81/200 [12:19<18:06,  9.13s/it]


Finished retraining (rank 4) — Final balanced accuracy: 0.7904

> Phase 2 – Retraining trial #92 (rank 5) with params: {'dropout': 0.0, 'lr': 1.4850937178920438e-05, 'num_hidden_neurons': 128, 'num_hidden_layers': 5, 'weight_decay': 0.0001539644245704936}


Trial 0 Epochs:  28%|██▊       | 55/200 [08:21<22:02,  9.12s/it]


Finished retraining (rank 5) — Final balanced accuracy: 0.7606
Two-phase optimization complete. Phase 2 results saved to: sqlite:////content/drive/MyDrive/Projects/Offline_RL_BSc_Thesis/logs/final_policy/BC_standard_refined.db


In [18]:
del fp_train_df, fp_valid_df, fp_normalization_techniques

In [19]:
if 'google.colab' in sys.modules:
  import IPython
  import time
  IPython.display.display(IPython.display.Javascript('IPython.notebook.save_checkpoint();'))

  time.sleep(2)
  IPython.display.display(IPython.display.Javascript('google.colab.kernel.disconnect();'))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>