# Final Model Training and Evaluation

Train the final model using the best hyperparameters from Optuna optimization.

In [1]:
import sys
import os
from pathlib import Path
import json

# Setup paths
notebook_dir = Path(os.path.abspath('')).parent
build_dir = notebook_dir.parent
sys.path.append(str(notebook_dir))

import pandas as pd
import numpy as np
from src.modeling.architecture import ModelBuilder
from src.modeling.trainer import ModelTrainer
from src.preprocessing.preprocessor import FeaturePreprocessor

2025-01-15 18:49:16.152366: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-15 18:49:16.189943: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-15 18:49:16.190775: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load best parameters from Optuna
with open(os.path.join(build_dir, 'optuna_results/best_parameters.json'), 'r') as f:
    best_params = json.load(f)

print("Best parameters:")
for key, value in best_params.items():
    print(f"{key}: {value}")

Best parameters:
n_layers: 4
activation: leaky_relu
dropout_rate: 0.1693670900721545
apply_batch_norm: True
optimizer: nadam
learning_rate: 0.0005535645533075388
batch_size: 1560
regularization: None
n_units_0: 887
n_units_1: 660
n_units_2: 748
n_units_3: 697


In [3]:
# Load and preprocess data
feature_cols = ['mH2', 'mHD', 'mAD', 'mHDp', 'alpha', 'L2', 'L8', 'vs', 'm22sq']
label_cols = ['valid_BFB', 'valid_Uni', 'valid_STU', 'valid_Higgs']

# Load preprocessor
preprocessor = FeaturePreprocessor.load_transformers(
    os.path.join(build_dir, 'preprocessor')
)

# Load all datasets
train_data = pd.read_csv(os.path.join(build_dir, 'data_splits/train_set.tsv'), sep='\t')
val_data = pd.read_csv(os.path.join(build_dir, 'data_splits/val_set.tsv'), sep='\t')
test_data = pd.read_csv(os.path.join(build_dir, 'data_splits/test_set.tsv'), sep='\t')

# Preprocess all sets
X_train = preprocessor.transform(train_data[feature_cols])
y_train = train_data[label_cols]

X_val = preprocessor.transform(val_data[feature_cols])
y_val = val_data[label_cols]

X_test = preprocessor.transform(test_data[feature_cols])
y_test = test_data[label_cols]

In [4]:
# Initialize model builder
builder = ModelBuilder(
    input_shape=(len(feature_cols),),
    num_outputs=len(label_cols)
)

# Initialize trainer
trainer = ModelTrainer(
    model_builder=builder,
    feature_cols=feature_cols,
    label_cols=label_cols,
    output_dir=os.path.join(build_dir, 'final_model')
)

In [5]:
# Train final model
model = trainer.train(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    params=best_params
)

2025-01-15 18:49:27.188043: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: maienPC
2025-01-15 18:49:27.188069: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: maienPC
2025-01-15 18:49:27.188232: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
2025-01-15 18:49:27.188268: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 470.239.6


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
# Evaluate on test set
trainer.evaluate(
    model=model,
    X_test=X_test,
    y_test=y_test
)

