# IRIS Flowers Classification

In [1]:
from functools import partial

from mltraq import Run, create_session
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle

In [2]:
def load(run: Run):
    # Load the IRIS dataset, taking care of shuffling the samples.
    # We use run.vars, accessible only within the execution of the runs.
    run.vars.X, run.vars.y = shuffle(*load_iris(return_X_y=True), random_state=run.params.seed)

In [3]:
def train_predict(run: Run):
    # Instantiate and train classifier on 100 samples (50 random samples left for evaluation).
    model = run.params.classifier(random_state=run.params.seed).fit(run.vars.X[:100], run.vars.y[:100])

    # Track the classifier name on run.fields, persisted to database.
    run.fields.model_name = model.__class__.__name__

    # Use trained model to make predictions.
    run.vars.y_pred = model.predict(run.vars.X[100:])
    run.vars.y_true = run.vars.y[100:]

In [4]:
def evaluate(run: Run):
    # Track accuracy score from previously determined predictions.
    run.fields.accuracy = accuracy_score(run.vars.y_true, run.vars.y_pred)

In [5]:
# Connect to the MLtraq session and craete an experiment.
session = create_session()
experiment = session.create_experiment()

In [6]:
# Use a parameter grid to define the experiment's runs.
experiment.add_runs(
    classifier=[
        partial(DummyClassifier, strategy="most_frequent"),
        partial(LogisticRegression, max_iter=1000),
        partial(KMeans, n_clusters=3, n_init="auto"),
        DecisionTreeClassifier,
        RandomForestClassifier,
    ],
    seed=range(10),
)

In [7]:
# Execute experiment, running in parallel the step functions on each run.
experiment.execute(steps=[load, train_predict, evaluate])

  2%|2         | 1/50 [00:02<01:39,  2.03s/it]

In [8]:
(experiment.runs.df().groupby("model_name").mean(numeric_only=True).sort_values(by="accuracy", ascending=False))

Unnamed: 0_level_0,accuracy
model_name,Unnamed: 1_level_1
LogisticRegression,0.96
RandomForestClassifier,0.952
DecisionTreeClassifier,0.938
KMeans,0.336
DummyClassifier,0.288
