In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = sns.load_dataset('iris')
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [14]:
data['species'].value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

## Check for missing values in the data

In [4]:
data.isnull().any()

sepal_length    False
sepal_width     False
petal_length    False
petal_width     False
species         False
dtype: bool

In [5]:
data.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

## Create the X and y from the dataset

In [6]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
X_train.shape, X_test.shape

((112, 4), (38, 4))

### Apply logistic Regression 

In [8]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression()

### Perform the predictions on X_test data and store the result in y_pred

In [9]:
y_pred = lr.predict(X_test)
y_pred

array(['virginica', 'versicolor', 'setosa', 'virginica', 'setosa',
       'virginica', 'setosa', 'versicolor', 'versicolor', 'versicolor',
       'virginica', 'versicolor', 'versicolor', 'versicolor',
       'versicolor', 'setosa', 'versicolor', 'versicolor', 'setosa',
       'setosa', 'virginica', 'versicolor', 'setosa', 'setosa',
       'virginica', 'setosa', 'setosa', 'versicolor', 'versicolor',
       'setosa', 'virginica', 'versicolor', 'setosa', 'virginica',
       'virginica', 'versicolor', 'setosa', 'virginica'], dtype=object)

In [21]:
lr.predict_proba(X_test)

array([[1.17923988e-04, 5.61477646e-02, 9.43734311e-01],
       [1.26289144e-02, 9.60454649e-01, 2.69164365e-02],
       [9.84397662e-01, 1.56022990e-02, 3.85629176e-08],
       [1.25178576e-06, 2.31526624e-02, 9.76846086e-01],
       [9.70234812e-01, 2.97650252e-02, 1.62602976e-07],
       [2.01668233e-06, 5.94451581e-03, 9.94053468e-01],
       [9.81899508e-01, 1.81004220e-02, 7.04446579e-08],
       [2.84241323e-03, 7.47090370e-01, 2.50067217e-01],
       [1.50915548e-03, 7.38523334e-01, 2.59967510e-01],
       [2.05288095e-02, 9.35891336e-01, 4.35798547e-02],
       [9.22425629e-05, 1.59473874e-01, 8.40433883e-01],
       [6.98627852e-03, 8.09990320e-01, 1.83023401e-01],
       [4.08220420e-03, 7.93602432e-01, 2.02315363e-01],
       [3.05681764e-03, 7.60910422e-01, 2.36032760e-01],
       [3.87699715e-03, 7.10277101e-01, 2.85845902e-01],
       [9.82815595e-01, 1.71843480e-02, 5.65465066e-08],
       [6.72901374e-03, 7.56465752e-01, 2.36805234e-01],
       [1.14291828e-02, 8.45110

In [11]:
X_test

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
114,5.8,2.8,5.1,2.4
62,6.0,2.2,4.0,1.0
33,5.5,4.2,1.4,0.2
107,7.3,2.9,6.3,1.8
7,5.0,3.4,1.5,0.2
100,6.3,3.3,6.0,2.5
40,5.0,3.5,1.3,0.3
86,6.7,3.1,4.7,1.5
76,6.8,2.8,4.8,1.4
71,6.1,2.8,4.0,1.3


In [15]:
y_test = pd.DataFrame(y_test)
y_test

Unnamed: 0,Outcome
661,1
122,0
113,0
14,1
529,0
...,...
366,1
301,1
382,0
140,0


## Confusion matrix and classification report

In [24]:
accuracy_score(y_test, y_pred)

0.9736842105263158

In [23]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [25]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  0,  9]], dtype=int64)

In [26]:
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.94      1.00      0.97        15
   virginica       1.00      0.90      0.95        10

    accuracy                           0.97        38
   macro avg       0.98      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38

