In [None]:
from src.defi_volume_predictor import DeFiVolumePredictor
import pandas as pd

In [3]:
try:
    df = pd.read_csv('data/external/df_for_learning.csv')
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: 'df_for_learning.csv' not found. Please ensure it's in the correct directory.")
    df = None

Data loaded successfully.


In [5]:
if df is not None:
    USE_SEMANTIC_FEATURES = False
    
    print("\n" + "="*50)
    print(f"EXPERIMENT: Running with semantic features = {USE_SEMANTIC_FEATURES}")
    print("="*50)
    
    baseline_predictor = DeFiVolumePredictor(use_semantic_features=USE_SEMANTIC_FEATURES)
    X_train, X_test, y_train, y_test, features = baseline_predictor.prepare_data(df)
    print("Data preparation complete.")
    print(f"Training data shape: {X_train.shape}")
    print(f"Test data shape: {X_test.shape}")


EXPERIMENT: Running with semantic features = False
Data preparation complete.
Training data shape: (6145, 24)
Test data shape: (1537, 24)


In [6]:
if df is not None:
    print("\n" + "="*50)
    print("STEP 1: HYPERPARAMETER OPTIMIZATION WITH OPTUNA")
    print("="*50)
    
    baseline_predictor.optimize_model('rf', n_trials=30)

[I 2025-06-10 23:44:19,088] A new study created in memory with name: no-name-0899f84f-8c41-43fe-beda-2ad80c800f5c



STEP 1: HYPERPARAMETER OPTIMIZATION WITH OPTUNA

Optimizing RF...


[I 2025-06-10 23:44:27,368] Trial 0 finished with value: 0.12302623500587023 and parameters: {'n_estimators': 76, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 17, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 0 for RF finished with value: 0.12303


[I 2025-06-10 23:44:42,442] Trial 1 finished with value: 0.12624432262633475 and parameters: {'n_estimators': 138, 'max_depth': 27, 'min_samples_split': 15, 'min_samples_leaf': 12, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 1 for RF finished with value: 0.12624


[I 2025-06-10 23:44:59,356] Trial 2 finished with value: 0.12678544776138187 and parameters: {'n_estimators': 163, 'max_depth': 11, 'min_samples_split': 20, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.12302623500587023.


Trial 2 for RF finished with value: 0.12679


[I 2025-06-10 23:45:32,941] Trial 3 finished with value: 0.12690339053978927 and parameters: {'n_estimators': 315, 'max_depth': 24, 'min_samples_split': 10, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 3 for RF finished with value: 0.12690


[I 2025-06-10 23:46:07,127] Trial 4 finished with value: 0.12771273317027648 and parameters: {'n_estimators': 343, 'max_depth': 25, 'min_samples_split': 4, 'min_samples_leaf': 20, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 4 for RF finished with value: 0.12771


[I 2025-06-10 23:46:22,545] Trial 5 finished with value: 0.12450747092652169 and parameters: {'n_estimators': 137, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 11, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 5 for RF finished with value: 0.12451


[I 2025-06-10 23:46:43,732] Trial 6 finished with value: 0.12583034659920847 and parameters: {'n_estimators': 250, 'max_depth': 7, 'min_samples_split': 13, 'min_samples_leaf': 17, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 6 for RF finished with value: 0.12583


[I 2025-06-10 23:46:52,150] Trial 7 finished with value: 0.1262769932394919 and parameters: {'n_estimators': 63, 'max_depth': 13, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.12302623500587023.


Trial 7 for RF finished with value: 0.12628


[I 2025-06-10 23:47:19,628] Trial 8 finished with value: 0.12871924358564565 and parameters: {'n_estimators': 331, 'max_depth': 6, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 8 for RF finished with value: 0.12872


[I 2025-06-10 23:48:00,632] Trial 9 finished with value: 0.12817582576079492 and parameters: {'n_estimators': 360, 'max_depth': 30, 'min_samples_split': 25, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.12302623500587023.


Trial 9 for RF finished with value: 0.12818


[I 2025-06-10 23:48:07,881] Trial 10 finished with value: 0.12528660522609078 and parameters: {'n_estimators': 59, 'max_depth': 20, 'min_samples_split': 30, 'min_samples_leaf': 15, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.12302623500587023.


Trial 10 for RF finished with value: 0.12529


[I 2025-06-10 23:48:25,127] Trial 11 finished with value: 0.12539763382814165 and parameters: {'n_estimators': 146, 'max_depth': 19, 'min_samples_split': 22, 'min_samples_leaf': 15, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 11 for RF finished with value: 0.12540


[I 2025-06-10 23:48:37,789] Trial 12 finished with value: 0.12776566262121516 and parameters: {'n_estimators': 107, 'max_depth': 15, 'min_samples_split': 28, 'min_samples_leaf': 12, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 12 for RF finished with value: 0.12777


[I 2025-06-10 23:49:00,084] Trial 13 finished with value: 0.1264819289088621 and parameters: {'n_estimators': 216, 'max_depth': 22, 'min_samples_split': 18, 'min_samples_leaf': 20, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 13 for RF finished with value: 0.12648


[I 2025-06-10 23:49:19,576] Trial 14 finished with value: 0.12754634438337709 and parameters: {'n_estimators': 198, 'max_depth': 29, 'min_samples_split': 11, 'min_samples_leaf': 16, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 14 for RF finished with value: 0.12755


[I 2025-06-10 23:49:31,010] Trial 15 finished with value: 0.12422282745842712 and parameters: {'n_estimators': 96, 'max_depth': 16, 'min_samples_split': 2, 'min_samples_leaf': 6, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 15 for RF finished with value: 0.12422


[I 2025-06-10 23:49:40,556] Trial 16 finished with value: 0.1258351712850768 and parameters: {'n_estimators': 81, 'max_depth': 15, 'min_samples_split': 24, 'min_samples_leaf': 6, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 16 for RF finished with value: 0.12584


[I 2025-06-10 23:50:07,895] Trial 17 finished with value: 0.12622532740215262 and parameters: {'n_estimators': 271, 'max_depth': 10, 'min_samples_split': 16, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 17 for RF finished with value: 0.12623


[I 2025-06-10 23:50:21,221] Trial 18 finished with value: 0.1246046050842894 and parameters: {'n_estimators': 108, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.12302623500587023.


Trial 18 for RF finished with value: 0.12460


[I 2025-06-10 23:50:39,377] Trial 19 finished with value: 0.12626113121990273 and parameters: {'n_estimators': 187, 'max_depth': 16, 'min_samples_split': 26, 'min_samples_leaf': 18, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 19 for RF finished with value: 0.12626


[I 2025-06-10 23:50:48,221] Trial 20 finished with value: 0.1230504822869059 and parameters: {'n_estimators': 96, 'max_depth': 9, 'min_samples_split': 21, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 20 for RF finished with value: 0.12305


[I 2025-06-10 23:50:57,306] Trial 21 finished with value: 0.12360082948423629 and parameters: {'n_estimators': 95, 'max_depth': 10, 'min_samples_split': 19, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 21 for RF finished with value: 0.12360


[I 2025-06-10 23:51:32,759] Trial 22 finished with value: 0.12590650047875454 and parameters: {'n_estimators': 393, 'max_depth': 9, 'min_samples_split': 21, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 22 for RF finished with value: 0.12591


[I 2025-06-10 23:51:41,757] Trial 23 finished with value: 0.13387754473218091 and parameters: {'n_estimators': 116, 'max_depth': 5, 'min_samples_split': 19, 'min_samples_leaf': 13, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 23 for RF finished with value: 0.13388


[I 2025-06-10 23:51:47,591] Trial 24 finished with value: 0.12401620165105287 and parameters: {'n_estimators': 52, 'max_depth': 12, 'min_samples_split': 23, 'min_samples_leaf': 18, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 24 for RF finished with value: 0.12402


[I 2025-06-10 23:52:03,807] Trial 25 finished with value: 0.12682841271792936 and parameters: {'n_estimators': 171, 'max_depth': 9, 'min_samples_split': 30, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.12302623500587023.


Trial 25 for RF finished with value: 0.12683


[I 2025-06-10 23:52:11,395] Trial 26 finished with value: 0.12229929024650148 and parameters: {'n_estimators': 82, 'max_depth': 8, 'min_samples_split': 27, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 26 with value: 0.12229929024650148.


Trial 26 for RF finished with value: 0.12230


[I 2025-06-10 23:52:17,802] Trial 27 finished with value: 0.12430797480749599 and parameters: {'n_estimators': 76, 'max_depth': 7, 'min_samples_split': 27, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 26 with value: 0.12229929024650148.


Trial 27 for RF finished with value: 0.12431


[I 2025-06-10 23:52:30,112] Trial 28 finished with value: 0.12964533899238842 and parameters: {'n_estimators': 125, 'max_depth': 13, 'min_samples_split': 28, 'min_samples_leaf': 11, 'max_features': 'sqrt'}. Best is trial 26 with value: 0.12229929024650148.


Trial 28 for RF finished with value: 0.12965


[I 2025-06-10 23:52:43,070] Trial 29 finished with value: 0.12657661033994663 and parameters: {'n_estimators': 156, 'max_depth': 8, 'min_samples_split': 24, 'min_samples_leaf': 12, 'max_features': 'sqrt'}. Best is trial 26 with value: 0.12229929024650148.


Trial 29 for RF finished with value: 0.12658

Optimization for RF complete.
Best validation MAE: 0.1223
Best params: {'n_estimators': 82, 'max_depth': 8, 'min_samples_split': 27, 'min_samples_leaf': 10, 'max_features': 'sqrt'}


In [7]:
baseline_predictor.optimize_model('lstm', n_trials=15)

[I 2025-06-10 23:53:13,234] A new study created in memory with name: no-name-a4d7b265-71c8-42f7-9220-5b7fc2b28a04



Optimizing LSTM...


[I 2025-06-10 23:54:28,539] Trial 0 finished with value: 0.16053803265094757 and parameters: {'sequence_length': 24, 'learning_rate': 0.000991059225779319, 'dropout_rate': 0.46209930073199057, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 0 with value: 0.16053803265094757.


Trial 0 for LSTM finished with value: 0.16054


[I 2025-06-10 23:55:38,740] Trial 1 finished with value: 0.3032906651496887 and parameters: {'sequence_length': 48, 'learning_rate': 0.00030501078468756653, 'dropout_rate': 0.29518345553307423, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 0 with value: 0.16053803265094757.


Trial 1 for LSTM finished with value: 0.30329


[I 2025-06-10 23:56:29,173] Trial 2 finished with value: 0.24178442358970642 and parameters: {'sequence_length': 48, 'learning_rate': 0.0007655333085585385, 'dropout_rate': 0.49811698412774674, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 0 with value: 0.16053803265094757.


Trial 2 for LSTM finished with value: 0.24178


[I 2025-06-10 23:57:51,540] Trial 3 finished with value: 0.2094832956790924 and parameters: {'sequence_length': 72, 'learning_rate': 0.0004632122607461466, 'dropout_rate': 0.255143189960166, 'n_units_1': 256, 'n_units_2': 32}. Best is trial 0 with value: 0.16053803265094757.


Trial 3 for LSTM finished with value: 0.20948


[I 2025-06-10 23:59:22,535] Trial 4 finished with value: 0.11164223402738571 and parameters: {'sequence_length': 72, 'learning_rate': 0.005001625020440625, 'dropout_rate': 0.162044565806338, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 4 with value: 0.11164223402738571.


Trial 4 for LSTM finished with value: 0.11164


[I 2025-06-11 00:02:05,331] Trial 5 finished with value: 0.1342189610004425 and parameters: {'sequence_length': 72, 'learning_rate': 0.00025859069232677854, 'dropout_rate': 0.28185183868031816, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 4 with value: 0.11164223402738571.


Trial 5 for LSTM finished with value: 0.13422


[I 2025-06-11 00:03:55,540] Trial 6 finished with value: 0.16573895514011383 and parameters: {'sequence_length': 72, 'learning_rate': 0.0014074819366093395, 'dropout_rate': 0.47349765123497656, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 4 with value: 0.11164223402738571.


Trial 6 for LSTM finished with value: 0.16574


[I 2025-06-11 00:05:30,904] Trial 7 finished with value: 0.16084091365337372 and parameters: {'sequence_length': 72, 'learning_rate': 0.009455116437380343, 'dropout_rate': 0.23465704244557456, 'n_units_1': 64, 'n_units_2': 64}. Best is trial 4 with value: 0.11164223402738571.


Trial 7 for LSTM finished with value: 0.16084


[I 2025-06-11 00:06:07,150] Trial 8 finished with value: 0.2089257836341858 and parameters: {'sequence_length': 48, 'learning_rate': 0.004828761915430644, 'dropout_rate': 0.3458834802053339, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 4 with value: 0.11164223402738571.


Trial 8 for LSTM finished with value: 0.20893


[I 2025-06-11 00:06:32,711] Trial 9 finished with value: 0.22537736594676971 and parameters: {'sequence_length': 72, 'learning_rate': 0.00019660352146280214, 'dropout_rate': 0.2807834635430616, 'n_units_1': 64, 'n_units_2': 32}. Best is trial 4 with value: 0.11164223402738571.


Trial 9 for LSTM finished with value: 0.22538


[I 2025-06-11 00:06:51,765] Trial 10 finished with value: 0.17905887961387634 and parameters: {'sequence_length': 24, 'learning_rate': 0.003052848165483485, 'dropout_rate': 0.10270261333182462, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 4 with value: 0.11164223402738571.


Trial 10 for LSTM finished with value: 0.17906


[I 2025-06-11 00:07:58,882] Trial 11 finished with value: 0.29919853806495667 and parameters: {'sequence_length': 72, 'learning_rate': 0.00011681259031899465, 'dropout_rate': 0.14342487756876518, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 4 with value: 0.11164223402738571.


Trial 11 for LSTM finished with value: 0.29920


[I 2025-06-11 00:09:50,388] Trial 12 finished with value: 0.2074279487133026 and parameters: {'sequence_length': 72, 'learning_rate': 0.0018508120318186235, 'dropout_rate': 0.1919916558632368, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 4 with value: 0.11164223402738571.


Trial 12 for LSTM finished with value: 0.20743


[I 2025-06-11 00:10:47,371] Trial 13 finished with value: 0.11097129434347153 and parameters: {'sequence_length': 72, 'learning_rate': 0.009175744999229598, 'dropout_rate': 0.3737043035600437, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 13 with value: 0.11097129434347153.


Trial 13 for LSTM finished with value: 0.11097


[I 2025-06-11 00:11:11,477] Trial 14 finished with value: 0.15547992289066315 and parameters: {'sequence_length': 72, 'learning_rate': 0.006482371449081739, 'dropout_rate': 0.37795690865687687, 'n_units_1': 64, 'n_units_2': 32}. Best is trial 13 with value: 0.11097129434347153.


Trial 14 for LSTM finished with value: 0.15548

Optimization for LSTM complete.
Best validation MAE: 0.1110
Best params: {'sequence_length': 72, 'learning_rate': 0.009175744999229598, 'dropout_rate': 0.3737043035600437, 'n_units_1': 128, 'n_units_2': 64}


{'sequence_length': 72,
 'learning_rate': 0.009175744999229598,
 'dropout_rate': 0.3737043035600437,
 'n_units_1': 128,
 'n_units_2': 64}

In [8]:
baseline_predictor.optimize_model('gru', n_trials=15)

[I 2025-06-11 00:11:30,448] A new study created in memory with name: no-name-d803e43f-bd12-4eb4-9013-7984d7a627a0



Optimizing GRU...


[I 2025-06-11 00:12:09,408] Trial 0 finished with value: 0.19026044011116028 and parameters: {'sequence_length': 72, 'learning_rate': 0.0008945622483090992, 'dropout_rate': 0.15017098481959812, 'n_units_1': 64, 'n_units_2': 32}. Best is trial 0 with value: 0.19026044011116028.


Trial 0 for GRU finished with value: 0.19026


[I 2025-06-11 00:13:07,164] Trial 1 finished with value: 0.1706087291240692 and parameters: {'sequence_length': 24, 'learning_rate': 0.00019736880006167935, 'dropout_rate': 0.14206733800029628, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 1 with value: 0.1706087291240692.


Trial 1 for GRU finished with value: 0.17061


[I 2025-06-11 00:13:23,221] Trial 2 finished with value: 0.2218904346227646 and parameters: {'sequence_length': 24, 'learning_rate': 0.0025374759012399403, 'dropout_rate': 0.43684290541676807, 'n_units_1': 128, 'n_units_2': 32}. Best is trial 1 with value: 0.1706087291240692.


Trial 2 for GRU finished with value: 0.22189


[I 2025-06-11 00:13:40,494] Trial 3 finished with value: 0.20466917753219604 and parameters: {'sequence_length': 24, 'learning_rate': 0.0003455206669370324, 'dropout_rate': 0.36641124542974357, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 1 with value: 0.1706087291240692.


Trial 3 for GRU finished with value: 0.20467


[I 2025-06-11 00:14:57,051] Trial 4 finished with value: 0.13142380118370056 and parameters: {'sequence_length': 72, 'learning_rate': 0.002010537094143424, 'dropout_rate': 0.30068931650681097, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 4 with value: 0.13142380118370056.


Trial 4 for GRU finished with value: 0.13142


[I 2025-06-11 00:15:38,168] Trial 5 finished with value: 0.11869289726018906 and parameters: {'sequence_length': 48, 'learning_rate': 0.0008783087279134427, 'dropout_rate': 0.49143379231925255, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 5 with value: 0.11869289726018906.


Trial 5 for GRU finished with value: 0.11869


[I 2025-06-11 00:17:22,200] Trial 6 finished with value: 0.14058423042297363 and parameters: {'sequence_length': 72, 'learning_rate': 0.0077820038603426235, 'dropout_rate': 0.20960209680257, 'n_units_1': 256, 'n_units_2': 128}. Best is trial 5 with value: 0.11869289726018906.


Trial 6 for GRU finished with value: 0.14058


[I 2025-06-11 00:17:47,496] Trial 7 finished with value: 0.2927401065826416 and parameters: {'sequence_length': 24, 'learning_rate': 0.0001318438351284519, 'dropout_rate': 0.12049616682247044, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 5 with value: 0.11869289726018906.


Trial 7 for GRU finished with value: 0.29274


[I 2025-06-11 00:18:55,425] Trial 8 finished with value: 0.16137738525867462 and parameters: {'sequence_length': 72, 'learning_rate': 0.003335657821767457, 'dropout_rate': 0.20006660940659265, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 5 with value: 0.11869289726018906.


Trial 8 for GRU finished with value: 0.16138


[I 2025-06-11 00:20:19,095] Trial 9 finished with value: 0.21250678598880768 and parameters: {'sequence_length': 48, 'learning_rate': 0.0003372892297829348, 'dropout_rate': 0.2412784433510638, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 5 with value: 0.11869289726018906.


Trial 9 for GRU finished with value: 0.21251


[I 2025-06-11 00:20:51,565] Trial 10 finished with value: 0.1826878935098648 and parameters: {'sequence_length': 48, 'learning_rate': 0.0008217652664477211, 'dropout_rate': 0.4951402651640514, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 5 with value: 0.11869289726018906.


Trial 10 for GRU finished with value: 0.18269


[I 2025-06-11 00:21:46,170] Trial 11 finished with value: 0.15565039217472076 and parameters: {'sequence_length': 48, 'learning_rate': 0.0016584680796374407, 'dropout_rate': 0.3384755788841131, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 5 with value: 0.11869289726018906.


Trial 11 for GRU finished with value: 0.15565


[I 2025-06-11 00:22:17,521] Trial 12 finished with value: 0.2282940000295639 and parameters: {'sequence_length': 72, 'learning_rate': 0.00838460714792424, 'dropout_rate': 0.40595768250483716, 'n_units_1': 64, 'n_units_2': 64}. Best is trial 5 with value: 0.11869289726018906.


Trial 12 for GRU finished with value: 0.22829


[I 2025-06-11 00:22:39,439] Trial 13 finished with value: 0.19222764670848846 and parameters: {'sequence_length': 48, 'learning_rate': 0.0005098491608785031, 'dropout_rate': 0.3029826456778313, 'n_units_1': 64, 'n_units_2': 64}. Best is trial 5 with value: 0.11869289726018906.


Trial 13 for GRU finished with value: 0.19223


[I 2025-06-11 00:25:17,449] Trial 14 finished with value: 0.1494389921426773 and parameters: {'sequence_length': 72, 'learning_rate': 0.0015181480840605857, 'dropout_rate': 0.46788973204600537, 'n_units_1': 128, 'n_units_2': 32}. Best is trial 5 with value: 0.11869289726018906.


Trial 14 for GRU finished with value: 0.14944

Optimization for GRU complete.
Best validation MAE: 0.1187
Best params: {'sequence_length': 48, 'learning_rate': 0.0008783087279134427, 'dropout_rate': 0.49143379231925255, 'n_units_1': 64, 'n_units_2': 128}


{'sequence_length': 48,
 'learning_rate': 0.0008783087279134427,
 'dropout_rate': 0.49143379231925255,
 'n_units_1': 64,
 'n_units_2': 128}

In [9]:
if df is not None:
    USE_SEMANTIC_FEATURES = True
    
    print("\n" + "="*50)
    print(f"EXPERIMENT: Running with semantic features = {USE_SEMANTIC_FEATURES}")
    print("="*50)
    
    semantic_predictor = DeFiVolumePredictor(use_semantic_features=USE_SEMANTIC_FEATURES)
    X_train, X_test, y_train, y_test, features = semantic_predictor.prepare_data(df)
    print("Data preparation complete.")
    print(f"Training data shape: {X_train.shape}")
    print(f"Test data shape: {X_test.shape}")


EXPERIMENT: Running with semantic features = True
Data preparation complete.
Training data shape: (6145, 32)
Test data shape: (1537, 32)


In [10]:
if df is not None:
    print("\n" + "="*50)
    print("STEP 1: HYPERPARAMETER OPTIMIZATION WITH OPTUNA")
    print("="*50)
    
    semantic_predictor.optimize_model('rf', n_trials=30)

[I 2025-06-11 00:25:47,979] A new study created in memory with name: no-name-244337ca-0fc3-4cf7-a15d-40125e62f412



STEP 1: HYPERPARAMETER OPTIMIZATION WITH OPTUNA

Optimizing RF...


[I 2025-06-11 00:26:19,481] Trial 0 finished with value: 0.13214169602157502 and parameters: {'n_estimators': 254, 'max_depth': 20, 'min_samples_split': 25, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 0 with value: 0.13214169602157502.


Trial 0 for RF finished with value: 0.13214


[I 2025-06-11 00:26:26,677] Trial 1 finished with value: 0.13034444215090482 and parameters: {'n_estimators': 58, 'max_depth': 27, 'min_samples_split': 18, 'min_samples_leaf': 16, 'max_features': 'log2'}. Best is trial 1 with value: 0.13034444215090482.


Trial 1 for RF finished with value: 0.13034


[I 2025-06-11 00:26:35,019] Trial 2 finished with value: 0.12921602703942495 and parameters: {'n_estimators': 64, 'max_depth': 16, 'min_samples_split': 30, 'min_samples_leaf': 19, 'max_features': 'log2'}. Best is trial 2 with value: 0.12921602703942495.


Trial 2 for RF finished with value: 0.12922


[I 2025-06-11 00:26:45,411] Trial 3 finished with value: 0.12820992434483044 and parameters: {'n_estimators': 90, 'max_depth': 22, 'min_samples_split': 30, 'min_samples_leaf': 16, 'max_features': 'sqrt'}. Best is trial 3 with value: 0.12820992434483044.


Trial 3 for RF finished with value: 0.12821


[I 2025-06-11 00:27:05,727] Trial 4 finished with value: 0.12727405421652677 and parameters: {'n_estimators': 192, 'max_depth': 22, 'min_samples_split': 11, 'min_samples_leaf': 18, 'max_features': 'log2'}. Best is trial 4 with value: 0.12727405421652677.


Trial 4 for RF finished with value: 0.12727


[I 2025-06-11 00:27:47,735] Trial 5 finished with value: 0.12730674020775265 and parameters: {'n_estimators': 389, 'max_depth': 21, 'min_samples_split': 3, 'min_samples_leaf': 18, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.12727405421652677.


Trial 5 for RF finished with value: 0.12731


[I 2025-06-11 00:28:34,477] Trial 6 finished with value: 0.13094875926544355 and parameters: {'n_estimators': 367, 'max_depth': 17, 'min_samples_split': 13, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 4 with value: 0.12727405421652677.


Trial 6 for RF finished with value: 0.13095


[I 2025-06-11 00:28:45,820] Trial 7 finished with value: 0.12924782333347942 and parameters: {'n_estimators': 95, 'max_depth': 17, 'min_samples_split': 23, 'min_samples_leaf': 18, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.12727405421652677.


Trial 7 for RF finished with value: 0.12925


[I 2025-06-11 00:29:07,473] Trial 8 finished with value: 0.1317781128972064 and parameters: {'n_estimators': 193, 'max_depth': 13, 'min_samples_split': 22, 'min_samples_leaf': 10, 'max_features': 'log2'}. Best is trial 4 with value: 0.12727405421652677.


Trial 8 for RF finished with value: 0.13178


[I 2025-06-11 00:29:36,762] Trial 9 finished with value: 0.1265368616202878 and parameters: {'n_estimators': 268, 'max_depth': 23, 'min_samples_split': 2, 'min_samples_leaf': 19, 'max_features': 'log2'}. Best is trial 9 with value: 0.1265368616202878.


Trial 9 for RF finished with value: 0.12654


[I 2025-06-11 00:29:58,129] Trial 10 finished with value: 0.11929076911631091 and parameters: {'n_estimators': 297, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 10 with value: 0.11929076911631091.


Trial 10 for RF finished with value: 0.11929


[I 2025-06-11 00:30:19,663] Trial 11 finished with value: 0.11917950802989037 and parameters: {'n_estimators': 298, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 11 for RF finished with value: 0.11918


[I 2025-06-11 00:30:44,987] Trial 12 finished with value: 0.12050857567489211 and parameters: {'n_estimators': 328, 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 12 for RF finished with value: 0.12051


[I 2025-06-11 00:31:07,067] Trial 13 finished with value: 0.11941328692490404 and parameters: {'n_estimators': 308, 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 13 for RF finished with value: 0.11941


[I 2025-06-11 00:31:41,870] Trial 14 finished with value: 0.13114952643421576 and parameters: {'n_estimators': 312, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 14 for RF finished with value: 0.13115


[I 2025-06-11 00:32:04,015] Trial 15 finished with value: 0.12915804949117457 and parameters: {'n_estimators': 220, 'max_depth': 9, 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 15 for RF finished with value: 0.12916


[I 2025-06-11 00:32:19,289] Trial 16 finished with value: 0.12802035838628653 and parameters: {'n_estimators': 147, 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 16 for RF finished with value: 0.12802


[I 2025-06-11 00:32:48,723] Trial 17 finished with value: 0.13124636607995693 and parameters: {'n_estimators': 271, 'max_depth': 12, 'min_samples_split': 16, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 17 for RF finished with value: 0.13125


[I 2025-06-11 00:33:17,237] Trial 18 finished with value: 0.1233196526537097 and parameters: {'n_estimators': 340, 'max_depth': 7, 'min_samples_split': 10, 'min_samples_leaf': 13, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 18 for RF finished with value: 0.12332


[I 2025-06-11 00:33:49,882] Trial 19 finished with value: 0.13178493100883334 and parameters: {'n_estimators': 286, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 19 for RF finished with value: 0.13178


[I 2025-06-11 00:34:19,483] Trial 20 finished with value: 0.13270719409626333 and parameters: {'n_estimators': 232, 'max_depth': 30, 'min_samples_split': 15, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 20 for RF finished with value: 0.13271


[I 2025-06-11 00:34:42,324] Trial 21 finished with value: 0.11981766366098912 and parameters: {'n_estimators': 321, 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 21 for RF finished with value: 0.11982


[I 2025-06-11 00:35:07,879] Trial 22 finished with value: 0.1254431585975612 and parameters: {'n_estimators': 291, 'max_depth': 7, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 22 for RF finished with value: 0.12544


[I 2025-06-11 00:35:39,963] Trial 23 finished with value: 0.12577472811146695 and parameters: {'n_estimators': 359, 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 11 with value: 0.11917950802989037.


Trial 23 for RF finished with value: 0.12577


[I 2025-06-11 00:36:01,396] Trial 24 finished with value: 0.11868473309816392 and parameters: {'n_estimators': 301, 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 24 with value: 0.11868473309816392.


Trial 24 for RF finished with value: 0.11868


[I 2025-06-11 00:36:45,200] Trial 25 finished with value: 0.13071476564783116 and parameters: {'n_estimators': 397, 'max_depth': 11, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 24 with value: 0.11868473309816392.


Trial 25 for RF finished with value: 0.13071


[I 2025-06-11 00:37:13,550] Trial 26 finished with value: 0.13048424166299696 and parameters: {'n_estimators': 242, 'max_depth': 15, 'min_samples_split': 9, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 24 with value: 0.11868473309816392.


Trial 26 for RF finished with value: 0.13048


[I 2025-06-11 00:37:47,128] Trial 27 finished with value: 0.1272055294707158 and parameters: {'n_estimators': 354, 'max_depth': 8, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 24 with value: 0.11868473309816392.


Trial 27 for RF finished with value: 0.12721


[I 2025-06-11 00:38:01,817] Trial 28 finished with value: 0.117976248376589 and parameters: {'n_estimators': 207, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 28 with value: 0.117976248376589.


Trial 28 for RF finished with value: 0.11798


[I 2025-06-11 00:38:16,308] Trial 29 finished with value: 0.12356200917181641 and parameters: {'n_estimators': 168, 'max_depth': 7, 'min_samples_split': 19, 'min_samples_leaf': 11, 'max_features': 'sqrt'}. Best is trial 28 with value: 0.117976248376589.


Trial 29 for RF finished with value: 0.12356

Optimization for RF complete.
Best validation MAE: 0.1180
Best params: {'n_estimators': 207, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt'}


In [11]:
semantic_predictor.optimize_model('lstm', n_trials=15)

[I 2025-06-11 00:39:14,999] A new study created in memory with name: no-name-5f72062b-acd1-4fdf-9be3-694ed04f15e8



Optimizing LSTM...


[I 2025-06-11 00:39:46,921] Trial 0 finished with value: 0.1747954934835434 and parameters: {'sequence_length': 48, 'learning_rate': 0.0015150154130193266, 'dropout_rate': 0.4944920916377161, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 0 with value: 0.1747954934835434.


Trial 0 for LSTM finished with value: 0.17480


[I 2025-06-11 00:41:09,027] Trial 1 finished with value: 0.18209978938102722 and parameters: {'sequence_length': 48, 'learning_rate': 0.0008036651196358127, 'dropout_rate': 0.2180795199569628, 'n_units_1': 256, 'n_units_2': 32}. Best is trial 0 with value: 0.1747954934835434.


Trial 1 for LSTM finished with value: 0.18210


[I 2025-06-11 00:43:03,837] Trial 2 finished with value: 0.11904815584421158 and parameters: {'sequence_length': 72, 'learning_rate': 0.000783018304584449, 'dropout_rate': 0.31295632947763385, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 2 with value: 0.11904815584421158.


Trial 2 for LSTM finished with value: 0.11905


[I 2025-06-11 00:43:57,065] Trial 3 finished with value: 0.18545489013195038 and parameters: {'sequence_length': 72, 'learning_rate': 0.0012633202986779008, 'dropout_rate': 0.18068185693981975, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 2 with value: 0.11904815584421158.


Trial 3 for LSTM finished with value: 0.18545


[I 2025-06-11 00:44:13,284] Trial 4 finished with value: 0.20682987570762634 and parameters: {'sequence_length': 24, 'learning_rate': 0.00034745785979241946, 'dropout_rate': 0.19006166983782605, 'n_units_1': 128, 'n_units_2': 32}. Best is trial 2 with value: 0.11904815584421158.


Trial 4 for LSTM finished with value: 0.20683


[I 2025-06-11 00:44:36,256] Trial 5 finished with value: 0.3217739462852478 and parameters: {'sequence_length': 24, 'learning_rate': 0.0002266123781264199, 'dropout_rate': 0.39636866704807316, 'n_units_1': 64, 'n_units_2': 64}. Best is trial 2 with value: 0.11904815584421158.


Trial 5 for LSTM finished with value: 0.32177


[I 2025-06-11 00:45:00,184] Trial 6 finished with value: 0.201654314994812 and parameters: {'sequence_length': 24, 'learning_rate': 0.0017983561740776022, 'dropout_rate': 0.3272197359513257, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 2 with value: 0.11904815584421158.


Trial 6 for LSTM finished with value: 0.20165


[I 2025-06-11 00:45:32,471] Trial 7 finished with value: 0.37447452545166016 and parameters: {'sequence_length': 48, 'learning_rate': 0.00010978874259775774, 'dropout_rate': 0.34850821920984953, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 2 with value: 0.11904815584421158.


Trial 7 for LSTM finished with value: 0.37447


[I 2025-06-11 00:48:01,038] Trial 8 finished with value: 0.1158425509929657 and parameters: {'sequence_length': 48, 'learning_rate': 0.004935029119156469, 'dropout_rate': 0.10034459862117764, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 8 with value: 0.1158425509929657.


Trial 8 for LSTM finished with value: 0.11584


[I 2025-06-11 00:48:45,367] Trial 9 finished with value: 0.1997298002243042 and parameters: {'sequence_length': 48, 'learning_rate': 0.0006115538773949876, 'dropout_rate': 0.4733740985703151, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 8 with value: 0.1158425509929657.


Trial 9 for LSTM finished with value: 0.19973


[I 2025-06-11 00:50:30,373] Trial 10 finished with value: 0.1283913254737854 and parameters: {'sequence_length': 48, 'learning_rate': 0.008112293995172309, 'dropout_rate': 0.11405939979802214, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 8 with value: 0.1158425509929657.


Trial 10 for LSTM finished with value: 0.12839


[I 2025-06-11 00:51:51,977] Trial 11 finished with value: 0.1048518493771553 and parameters: {'sequence_length': 72, 'learning_rate': 0.0052689088722282845, 'dropout_rate': 0.272542155247986, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 11 with value: 0.1048518493771553.


Trial 11 for LSTM finished with value: 0.10485


[I 2025-06-11 00:53:50,085] Trial 12 finished with value: 0.15180841088294983 and parameters: {'sequence_length': 72, 'learning_rate': 0.0061845102866126935, 'dropout_rate': 0.10935480268750253, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 11 with value: 0.1048518493771553.


Trial 12 for LSTM finished with value: 0.15181


[I 2025-06-11 00:56:55,861] Trial 13 finished with value: 0.13346756994724274 and parameters: {'sequence_length': 72, 'learning_rate': 0.0038873227378823814, 'dropout_rate': 0.23541996958621125, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 11 with value: 0.1048518493771553.


Trial 13 for LSTM finished with value: 0.13347


[I 2025-06-11 00:58:24,783] Trial 14 finished with value: 0.13855579495429993 and parameters: {'sequence_length': 72, 'learning_rate': 0.0028310110938638217, 'dropout_rate': 0.2700979051681922, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 11 with value: 0.1048518493771553.


Trial 14 for LSTM finished with value: 0.13856

Optimization for LSTM complete.
Best validation MAE: 0.1049
Best params: {'sequence_length': 72, 'learning_rate': 0.0052689088722282845, 'dropout_rate': 0.272542155247986, 'n_units_1': 128, 'n_units_2': 64}


{'sequence_length': 72,
 'learning_rate': 0.0052689088722282845,
 'dropout_rate': 0.272542155247986,
 'n_units_1': 128,
 'n_units_2': 64}

In [12]:
semantic_predictor.optimize_model('gru', n_trials=15)

[I 2025-06-11 00:58:34,782] A new study created in memory with name: no-name-37566d96-17ec-4bd7-88c6-9fb28b90a7a3



Optimizing GRU...


[I 2025-06-11 01:00:00,100] Trial 0 finished with value: 0.14531834423542023 and parameters: {'sequence_length': 48, 'learning_rate': 0.00260306528132184, 'dropout_rate': 0.32826480856119766, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 0 for GRU finished with value: 0.14532


[I 2025-06-11 01:00:42,338] Trial 1 finished with value: 0.27003687620162964 and parameters: {'sequence_length': 72, 'learning_rate': 0.00018405130991291154, 'dropout_rate': 0.21386779247373325, 'n_units_1': 128, 'n_units_2': 32}. Best is trial 0 with value: 0.14531834423542023.


Trial 1 for GRU finished with value: 0.27004


[I 2025-06-11 01:02:18,020] Trial 2 finished with value: 0.16353976726531982 and parameters: {'sequence_length': 72, 'learning_rate': 0.008872163705599014, 'dropout_rate': 0.12396289799661489, 'n_units_1': 256, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 2 for GRU finished with value: 0.16354


[I 2025-06-11 01:02:29,905] Trial 3 finished with value: 0.2932281494140625 and parameters: {'sequence_length': 24, 'learning_rate': 0.00011695546994373498, 'dropout_rate': 0.4422249410973528, 'n_units_1': 64, 'n_units_2': 32}. Best is trial 0 with value: 0.14531834423542023.


Trial 3 for GRU finished with value: 0.29323


[I 2025-06-11 01:03:19,374] Trial 4 finished with value: 0.24078814685344696 and parameters: {'sequence_length': 24, 'learning_rate': 0.00023355345092435946, 'dropout_rate': 0.3461986261855448, 'n_units_1': 256, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 4 for GRU finished with value: 0.24079


[I 2025-06-11 01:03:46,151] Trial 5 finished with value: 0.16472874581813812 and parameters: {'sequence_length': 72, 'learning_rate': 0.00845174665818411, 'dropout_rate': 0.28948890089520407, 'n_units_1': 64, 'n_units_2': 32}. Best is trial 0 with value: 0.14531834423542023.


Trial 5 for GRU finished with value: 0.16473


[I 2025-06-11 01:04:34,170] Trial 6 finished with value: 0.23652787506580353 and parameters: {'sequence_length': 72, 'learning_rate': 0.00022368212441483093, 'dropout_rate': 0.10092436101517897, 'n_units_1': 128, 'n_units_2': 64}. Best is trial 0 with value: 0.14531834423542023.


Trial 6 for GRU finished with value: 0.23653


[I 2025-06-11 01:05:03,463] Trial 7 finished with value: 0.17274267971515656 and parameters: {'sequence_length': 24, 'learning_rate': 0.00032455704649848085, 'dropout_rate': 0.46989060559673146, 'n_units_1': 128, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 7 for GRU finished with value: 0.17274


[I 2025-06-11 01:06:49,361] Trial 8 finished with value: 0.22482648491859436 and parameters: {'sequence_length': 48, 'learning_rate': 0.0006397956375012663, 'dropout_rate': 0.17953320815652601, 'n_units_1': 256, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 8 for GRU finished with value: 0.22483


[I 2025-06-11 01:08:14,047] Trial 9 finished with value: 0.21493014693260193 and parameters: {'sequence_length': 72, 'learning_rate': 0.0003791524433632545, 'dropout_rate': 0.21906630852290798, 'n_units_1': 256, 'n_units_2': 64}. Best is trial 0 with value: 0.14531834423542023.


Trial 9 for GRU finished with value: 0.21493


[I 2025-06-11 01:08:47,633] Trial 10 finished with value: 0.21717678010463715 and parameters: {'sequence_length': 48, 'learning_rate': 0.002379168006608498, 'dropout_rate': 0.37290165381985607, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 0 with value: 0.14531834423542023.


Trial 10 for GRU finished with value: 0.21718


[I 2025-06-11 01:09:51,820] Trial 11 finished with value: 0.11475760489702225 and parameters: {'sequence_length': 48, 'learning_rate': 0.009509241728516261, 'dropout_rate': 0.2832139771319348, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 11 with value: 0.11475760489702225.


Trial 11 for GRU finished with value: 0.11476


[I 2025-06-11 01:10:28,611] Trial 12 finished with value: 0.18879978358745575 and parameters: {'sequence_length': 48, 'learning_rate': 0.0026955732486296595, 'dropout_rate': 0.27772605491532715, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 11 with value: 0.11475760489702225.


Trial 12 for GRU finished with value: 0.18880


[I 2025-06-11 01:11:09,933] Trial 13 finished with value: 0.16074293851852417 and parameters: {'sequence_length': 48, 'learning_rate': 0.0036117062862611577, 'dropout_rate': 0.38051840970583206, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 11 with value: 0.11475760489702225.


Trial 13 for GRU finished with value: 0.16074


[I 2025-06-11 01:11:51,385] Trial 14 finished with value: 0.18223527073860168 and parameters: {'sequence_length': 48, 'learning_rate': 0.001307727145664806, 'dropout_rate': 0.3256245111551736, 'n_units_1': 64, 'n_units_2': 128}. Best is trial 11 with value: 0.11475760489702225.


Trial 14 for GRU finished with value: 0.18224

Optimization for GRU complete.
Best validation MAE: 0.1148
Best params: {'sequence_length': 48, 'learning_rate': 0.009509241728516261, 'dropout_rate': 0.2832139771319348, 'n_units_1': 64, 'n_units_2': 128}


{'sequence_length': 48,
 'learning_rate': 0.009509241728516261,
 'dropout_rate': 0.2832139771319348,
 'n_units_1': 64,
 'n_units_2': 128}

In [13]:
if df is not None:
    print("\n" + "="*50)
    print("STEP 2: TRAINING FINAL BASELINE MODELS WITH OPTIMIZED PARAMETERS")
    print("="*50)
    baseline_predictor.train_model('rf', X_train, y_train, X_test, y_test)
    baseline_predictor.train_model('lstm', X_train, y_train, X_test, y_test)
    baseline_predictor.train_model('gru', X_train, y_train, X_test, y_test)


STEP 2: TRAINING FINAL BASELINE MODELS WITH OPTIMIZED PARAMETERS

=== Training RF with best parameters ===
Final Test Metrics -> MAE: 0.0360, RMSE: 0.0577, R²: 0.8563

=== Training LSTM with best parameters ===
Final Test Metrics -> MAE: 0.0725, RMSE: 0.0985, R²: 0.5742

=== Training GRU with best parameters ===
Final Test Metrics -> MAE: 0.0821, RMSE: 0.1036, R²: 0.5295


In [14]:
if df is not None:
    print("\n" + "="*50)
    print("STEP 2: TRAINING FINAL SEMANTIC MODELS WITH OPTIMIZED PARAMETERS")
    print("="*50)
    semantic_predictor.train_model('rf', X_train, y_train, X_test, y_test)
    semantic_predictor.train_model('lstm', X_train, y_train, X_test, y_test)
    semantic_predictor.train_model('gru', X_train, y_train, X_test, y_test)


STEP 2: TRAINING FINAL SEMANTIC MODELS WITH OPTIMIZED PARAMETERS

=== Training RF with best parameters ===
Final Test Metrics -> MAE: 0.0395, RMSE: 0.0591, R²: 0.8480

=== Training LSTM with best parameters ===
Final Test Metrics -> MAE: 0.0744, RMSE: 0.0911, R²: 0.6382

=== Training GRU with best parameters ===
Final Test Metrics -> MAE: 0.1571, RMSE: 0.1981, R²: -0.7745


In [15]:
if df is not None:
    print("\n" + "="*50)
    print("FINAL BASELINE RESULTS SUMMARY")
    print("="*50)
    results_df = pd.DataFrame(baseline_predictor.results).T
    print(results_df.sort_values(by='mae'))


FINAL BASELINE RESULTS SUMMARY
           mae      rmse        r2
rf    0.035995  0.057727  0.856289
lstm  0.072456  0.098458  0.574215
gru   0.082086  0.103564  0.529462


In [16]:
if df is not None:
    print("\n" + "="*50)
    print("FINAL SEMANTIC RESULTS SUMMARY")
    print("="*50)
    results_df = pd.DataFrame(semantic_predictor.results).T
    print(results_df.sort_values(by='mae'))


FINAL SEMANTIC RESULTS SUMMARY
           mae      rmse        r2
rf    0.039456  0.059101  0.847957
lstm  0.074422  0.091080  0.638226
gru   0.157058  0.198104 -0.774504


In [17]:
from src.models_saver import save_predictor_state

if 'baseline_predictor' in locals():
    save_predictor_state(baseline_predictor, "saved_models/low/baseline")

if 'semantic_predictor' in locals():
    save_predictor_state(semantic_predictor, "saved_models/low/semantic")

Saving state to directory: saved_models/baseline
  - Models saved.
  - Scalers saved.
  - Best parameters saved.
Saving state to directory: saved_models/semantic
  - Models saved.
  - Scalers saved.
  - Best parameters saved.
