# Train Iris Classifier

### Load Dataset

In [1]:
from ml_project_template.data import Dataset

# Load dataset from CSV
dataset = Dataset.from_csv(".data/iris/iris.csv", target_column="species")

# Preview
print(f"Features: {dataset.feature_names}")
print(f"Target classes: {dataset.class_names}")
print(f"Shape: {dataset.X.shape}")

Features: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
Target classes: ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
Shape: (150, 4)


In [2]:
# Split into train/test
train_data, val_data = dataset.split(test_size=0.2, random_state=42)

print(f"Train: {train_data.X.shape}, Validation: {val_data.X.shape}")

Train: (120, 4), Validation: (30, 4)


### List Models

In [3]:
from ml_project_template.models import ModelRegistry

print(ModelRegistry.list())

  from .autonotebook import tqdm as notebook_tqdm


['gb_classifier', 'mlp_classifier']


### Train Gradient Boosting Classifier

In [4]:
# Create model
model = ModelRegistry.get("gb_classifier")(
    n_estimators=10, 
    max_depth=3
)

# Train, evaluate, and save
model.train(
    experiment_name="iris-gb-classifier",
    train_data=train_data,
    val_data=val_data,
    model_path=".models/iris_gb_classifier",
)

2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/07 14:46:01 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/07 14:46:02 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/07 14:46:02 INFO alembic.runtime.migration: Will assume non-transactional DDL.


### Evaluate Gradient Boosting Classifier

In [5]:
from sklearn.metrics import accuracy_score

# Create model with same architecture
model = ModelRegistry.get("gb_classifier")(
    n_estimators=10, 
    max_depth=3
)

# Load pretrained model
model.load(".models/iris_gb_classifier")

# Evaluate directly
predictions = model.predict(val_data.X)
accuracy = accuracy_score(val_data.y, predictions)
print(f"Test accuracy: {accuracy:.2%}")

Test accuracy: 100.00%


### Train MLP Classifier

In [9]:
# Create model
model = ModelRegistry.get("mlp_classifier")(
    input_dim=4,
    hidden_dim=16,
    num_classes=3
)

# Train, evaluate, and save
model.train(
    experiment_name="iris-mlp-classifier",
    train_data=train_data,
    val_data=val_data,
    lr=1e-3,
    max_epochs=1000,
    batch_size=8,
    val_frequency=1,
    patience=10,
    model_path=".models/iris_mlp_classifier",
)

Epoch: 278/1000 | train_loss: 0.0697 | val_loss: 0.0658 | best_val_loss: 0.0654:  28%|██▊       | 278/1000 [00:07<00:19, 36.99it/s]


10 epochs reached without improvement. Early stopping.


### Evaluate MLP Model

In [10]:
from sklearn.metrics import accuracy_score

# Create model with same architecture
model = ModelRegistry.get("mlp_classifier")(
    input_dim=4,
    hidden_dim=16,
    num_classes=3,
)

# Load pretrained model
model.load(".models/iris_mlp_classifier")

# Evaluate directly
output = model.predict(val_data.X)
predictions = output.argmax(axis=1)
accuracy = accuracy_score(val_data.y, predictions)
print(f"Test accuracy: {accuracy:.2%}")

Test accuracy: 100.00%
