# 03 — Classical Models

Train and evaluate classical ML classifiers.

In [None]:
import sys
sys.path.insert(0, "../src")
from data_loading import build_dataset, DataSplitter
from feature_extractors import build_feature_pipeline
from classical_models import build_all_classical_models
from evaluation import ModelEvaluator
from utils import load_config, set_global_seed

config = load_config("../configs/default.yaml")
set_global_seed(42)
dataset = build_dataset("../data/class_a.fasta", "../data/class_b.fasta", config)

In [None]:
# Split and extract features
splitter = DataSplitter(test_size=0.2, val_size=0.1, seed=42)
train_ds, val_ds, test_ds = splitter.split(dataset)

pipeline = build_feature_pipeline(config)
X_train = pipeline.fit_transform(train_ds.sequences)
X_test = pipeline.transform(test_ds.sequences)
import numpy as np
X_all_train = np.vstack([pipeline.transform(train_ds.sequences), pipeline.transform(val_ds.sequences)])
y_all_train = np.concatenate([train_ds.labels, val_ds.labels])
print(f"Training features: {X_all_train.shape}")

In [None]:
# Train all classical models
models = build_all_classical_models(config, seed=42, class_counts=dataset.class_counts)
for m in models:
    print(f"Training {m.name}...")
    m.fit(X_all_train, y_all_train)

In [None]:
# Evaluate
evaluator = ModelEvaluator(output_dir="../results/plots", label_names=dataset.label_names)
for m in models:
    y_pred = m.predict(X_test)
    y_proba = m.predict_proba(X_test)
    evaluator.evaluate_model(m.name, test_ds.labels, y_pred, y_proba)

df = evaluator.comparison_table()
print(df[["Model", "accuracy", "f1_weighted", "roc_auc", "mcc"]].to_string(index=False))

In [None]:
evaluator.plot_model_comparison("f1_weighted")