In [2]:
from sklearn.datasets import load_iris
import pandas as pd

## Load Data

In [3]:
def load_iris_features_and_target() -> tuple[pd.DataFrame, pd.Series]:
    iris = load_iris()
    features = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    target = pd.Series(iris['target'], name='species')
    return features, target

In [4]:
features, target = load_iris_features_and_target()

We now have a DataFrame of features.

In [5]:
features.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


And a Series containing our target

In [6]:
target.head()

0    0
1    0
2    0
3    0
4    0
Name: species, dtype: int64

Split the data into training and test sets.

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=18)

In [8]:
len(y_train)

120

## Model Training

We're not here for the best model, just to get an example.

In [16]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)

In [17]:
model.fit(X_train, y_train)

How do we do on the test set?

In [18]:
model.score(X_test, y_test)

1.0

(this data set is a bit too easy)

## Save model

In [19]:
model

In [20]:
import pickle
from pathlib import Path

In [21]:
pickle_path = Path("../models/model.pickle")
with open(pickle_path, "wb") as f:
    pickle.dump(model, f)

Load it back in just to be sure it worked properly.

In [22]:
with open(pickle_path, "rb") as f:
    model2 = pickle.load(f)

In [23]:
model2.score(X_test, y_test)

1.0