In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [3]:
df = load_iris()

In [4]:
df.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [5]:
X = pd.DataFrame(df.data[50:], columns=df.feature_names)

In [6]:
X.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
95,6.7,3.0,5.2,2.3
96,6.3,2.5,5.0,1.9
97,6.5,3.0,5.2,2.0
98,6.2,3.4,5.4,2.3
99,5.9,3.0,5.1,1.8


In [7]:
y = pd.Series(df.target[50:])

In [8]:
y.tail()

95    2
96    2
97    2
98    2
99    2
dtype: int64

### Cross-validation: Hold-out

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [10]:
X_train.shape

(70, 4)

In [11]:
X_test.shape

(30, 4)

### Classification: Logistic Regression

In [12]:
clf = LogisticRegression(random_state=0, solver="lbfgs")

In [13]:
clf.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=0, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [14]:
clf.score(X_train, y_train)

0.9714285714285714

In [15]:
clf.score(X_test, y_test)

0.9666666666666667

### Performance: Confusion Matrix

In [16]:
y_pred = clf.predict(X_test)

In [17]:
confusion_matrix(y_test, y_pred)

array([[14,  1],
       [ 0, 15]])

In [18]:
print(classification_report(y_test, y_pred, target_names=df.target_names[1:]))

              precision    recall  f1-score   support

  versicolor       1.00      0.93      0.97        15
   virginica       0.94      1.00      0.97        15

   micro avg       0.97      0.97      0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

