# Iris dataset model training and evaluation

**Author:** Alex Carneiro

**Course:** Moving2DS - Part 1

In [None]:
# importing libraries

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import joblib

import matplotlib.pyplot as plt
%matplotlib inline

## Data reading

In [None]:
dataset = pd.read_csv("../data/iris.data", header=None)
dataset.columns = ["sepal_l", "sepal_w",
                   "petal_l", "petal_w",
                   "class"]

print("Read %d samples from the dataset"%len(dataset))
dataset.sample(5)

## Data split for training and testing

In [None]:
full_data_input = dataset[["sepal_l", "sepal_w",
                           "petal_l", "petal_w"]].values
full_data_output = dataset["class"].values

train_input, test_input,\
train_output, test_output = train_test_split(full_data_input,
                                             full_data_output,
                                             test_size=.2)

In [None]:
full_data_input.shape, train_input.shape, test_input.shape

## Create and train a new instance of the classifier

In [None]:
model = LogisticRegression()
model.fit(train_input, train_output)

## Evaluate the trained model

In [None]:
acc = model.score(test_input, test_output)
print("Model accuracy = %.1f%%"%(100 * acc))

In [None]:
predictions = model.predict(test_input)
matrix = confusion_matrix(predictions, test_output)
print(matrix)

In [None]:
classes = sorted(np.unique(test_output))
pd.DataFrame(dict([(cl, l) for cl, l in zip(classes, matrix.T)]), index=classes)

## Save the trained model

In [None]:
joblib.dump(model, '../models/model.pkl');