# Train Iris Classifier

### Load Dataset

In [1]:
from ml_project_template.data import Dataset

# Load dataset from CSV
dataset = Dataset.from_csv(".data/iris/iris.csv", target_column="species")

# Preview
print(f"Features: {dataset.feature_names}")
print(f"Target classes: {dataset.class_names}")
print(f"Shape: {dataset.X.shape}")

Features: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
Target classes: ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
Shape: (150, 4)


In [2]:
# Split into train/test
train_data, test_data = dataset.split(test_size=0.2, random_state=42)

print(f"Train: {train_data.X.shape}, Test: {test_data.X.shape}")

Train: (120, 4), Test: (30, 4)


### List Models

In [3]:
from ml_project_template.models import ModelRegistry

print(ModelRegistry.list())

['gb_classifier', 'mlp_classifier']


### Gradient Boosting Classifier

In [4]:
from ml_project_template.training import Trainer

# Create model
model = ModelRegistry.get("gb_classifier")(
    n_estimators=10, 
    max_depth=3
)

# Create trainer
trainer = Trainer(
    model=model,
    experiment_name="iris-gb-classifier",
)

# Train, evaluate, and save (MLflow run managed internally)
metrics = trainer.run(
    train_data=train_data,
    test_data=test_data,
    model_path=".models/iris_gb_classifier",
    #run_name="gradient-boosting-run",
)
print(f"Test accuracy: {metrics['accuracy']:.2%}")

2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/03 21:42:36 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/03 21:42:36 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/03 21:42:36 INFO alembic.runtime.migration: Will assume non-transactional DDL.


Test accuracy: 100.00%


### Evaluate Gradient Boosting Classifier

In [5]:
from sklearn.metrics import accuracy_score

# Create model with same architecture
model = ModelRegistry.get("gb_classifier")(
    n_estimators=100, 
    max_depth=3
)

# Load pretrained model
model.load(".models/iris_gradient_boosting_classifier")

# Evaluate directly
predictions = model.predict(test_data.X)
accuracy = accuracy_score(test_data.y, predictions)
print(f"Test accuracy: {accuracy:.2%}")

Test accuracy: 100.00%


### Train MLP Model

In [6]:
from ml_project_template.training import Trainer

# Create model
model = ModelRegistry.get("mlp_classifier")(
    input_dim=4,
    hidden_dim=16,
    num_classes=3,
    lr=1e-3,
    epochs=500,
    batch_size=8
)

# Create trainer
trainer = Trainer(
    model=model,
    experiment_name="iris-mlp-classifier",
)

# Train, evaluate, and save (MLflow run managed internally)
metrics = trainer.run(
    train_data=train_data,
    test_data=test_data,
    model_path=".models/iris_mlp_classifier",
    #run_name="mlp-run",
)
print(f"Test accuracy: {metrics['accuracy']:.2%}")

Test accuracy: 96.67%


### Evaluate MLP Model

In [7]:
from sklearn.metrics import accuracy_score

# Create model with same architecture
model = ModelRegistry.get("mlp_classifier")(
    input_dim=4,
    hidden_dim=16,
    num_classes=3,
)

# Load pretrained model
model.load(".models/iris_mlp_classifier")

# Evaluate directly
predictions = model.predict(test_data.X)
accuracy = accuracy_score(test_data.y, predictions)
print(f"Test accuracy: {accuracy:.2%}")

Test accuracy: 96.67%
