# Final Model Training and Evaluation

Train the final model using the best hyperparameters from Optuna optimization.

In [None]:
import sys
import os
from pathlib import Path
import json

# Setup paths
notebook_dir = Path(os.path.abspath('')).parent
build_dir = notebook_dir.parent
sys.path.append(str(notebook_dir))

import pandas as pd
import numpy as np
from src.modeling.architecture import ModelBuilder
from src.modeling.trainer import ModelTrainer
from src.preprocessing.preprocessor import FeaturePreprocessor

In [None]:
# Load best parameters from Optuna
with open(os.path.join(build_dir, 'optuna_results/best_parameters.json'), 'r') as f:
    best_params = json.load(f)

print("Best parameters:")
for key, value in best_params.items():
    print(f"{key}: {value}")

In [None]:
# Load and preprocess data
feature_cols = ['mH2', 'mHD', 'mAD', 'mHDp', 'alpha', 'L2', 'L8', 'vs', 'm22sq']
label_cols = ['valid_BFB', 'valid_Uni', 'valid_STU', 'valid_Higgs']

# Load preprocessor
preprocessor = FeaturePreprocessor.load_transformers(
    os.path.join(build_dir, 'preprocessor')
)

# Load all datasets
train_data = pd.read_csv(os.path.join(build_dir, 'data_splits/train_set.tsv'), sep='\t')
val_data = pd.read_csv(os.path.join(build_dir, 'data_splits/val_set.tsv'), sep='\t')
test_data = pd.read_csv(os.path.join(build_dir, 'data_splits/test_set.tsv'), sep='\t')

# Preprocess all sets
X_train = preprocessor.transform(train_data[feature_cols])
y_train = train_data[label_cols]

X_val = preprocessor.transform(val_data[feature_cols])
y_val = val_data[label_cols]

X_test = preprocessor.transform(test_data[feature_cols])
y_test = test_data[label_cols]

In [None]:
# Initialize model builder
builder = ModelBuilder(
    input_shape=(len(feature_cols),),
    num_outputs=len(label_cols)
)

# Initialize trainer
trainer = ModelTrainer(
    model_builder=builder,
    feature_cols=feature_cols,
    label_cols=label_cols,
    output_dir=os.path.join(build_dir, 'final_model')
)

In [None]:
# Train final model
model = trainer.train(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    params=best_params
)

In [None]:
# Evaluate on test set
trainer.evaluate(
    model=model,
    X_test=X_test,
    y_test=y_test
)