# Iris dataset model training and evaluation

**Author:** Alex Carneiro

**Course:** Moving2DS - Part 1

In [1]:
# importing libraries

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import joblib

import matplotlib.pyplot as plt
%matplotlib inline

## Data reading

In [2]:
dataset = pd.read_csv("../data/iris.data", header=None)
dataset.columns = ["sepal_l", "sepal_w",
                   "petal_l", "petal_w",
                   "class"]

print("Read %d samples from the dataset"%len(dataset))
dataset.sample(5)

Read 150 samples from the dataset


Unnamed: 0,sepal_l,sepal_w,petal_l,petal_w,class
104,6.5,3.0,5.8,2.2,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica
136,6.3,3.4,5.6,2.4,Iris-virginica
81,5.5,2.4,3.7,1.0,Iris-versicolor
0,5.1,3.5,1.4,0.2,Iris-setosa


## Data split for training and testing

In [3]:
full_data_input = dataset[["sepal_l", "sepal_w",
                           "petal_l", "petal_w"]].values
full_data_output = dataset["class"].values

train_input, test_input,\
train_output, test_output = train_test_split(full_data_input,
                                             full_data_output,
                                             test_size=.2)

In [4]:
model = LogisticRegression()
model.fit(train_input, train_output)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [5]:
acc = model.score(test_input, test_output)
print("Model accuracy = %.1f%%"%(100 * acc))

Model accuracy = 86.7%


In [6]:
predictions = model.predict(test_input)
matrix = confusion_matrix(predictions, test_output)
print(matrix)

[[11  0  0]
 [ 0 10  0]
 [ 0  4  5]]


In [7]:
classes = sorted(np.unique(test_output))
pd.DataFrame(dict([(cl, l) for cl, l in zip(classes, matrix.T)]), index=classes)

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
Iris-setosa,11,0,0
Iris-versicolor,0,10,0
Iris-virginica,0,4,5


In [8]:
joblib.dump(model, '../models/model.pkl');

['../models/model.pkl']