In [1]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import pandas as pd
import joblib
import numpy as np

np.random.seed(12)

## Load Data

In [2]:
iris_data = load_iris()

In [3]:
data = pd.DataFrame(iris_data["data"], columns=iris_data["feature_names"])
target = pd.Series(
    [
        iris_data["target_names"][cat_idx]
        for cat_idx in iris_data["target"]
    ],
    dtype="category"
)

features = iris_data["feature_names"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

## Training

In [5]:
clf = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=1000)
clf.fit(X_train[features], y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Validation

In [6]:
y_train_pred = clf.predict(X_train[features])
y_test_pred = clf.predict(X_test[features])

In [7]:
confusion_matrix(y_train, y_train_pred)

array([[38,  0,  0],
       [ 0, 39,  2],
       [ 0,  1, 40]])

In [8]:
accuracy_score(y_train, y_train_pred)

0.975

In [9]:
confusion_matrix(y_test, y_test_pred)

array([[12,  0,  0],
       [ 0,  8,  1],
       [ 0,  0,  9]])

In [10]:
accuracy_score(y_test, y_test_pred)

0.9666666666666667

## Save the model

In [11]:
with open("models/iris_classifier.joblib", "wb") as f:
    joblib.dump(clf, f)

with open("models/iris_classifier_features.joblib", "wb") as f:
    joblib.dump(features, f)