In [1]:
# Multiclass Classification Problem (One Vs Rest or One Vs All) for Logistic Regression


import os
import numpy as np 
import pandas as pd 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model  import LogisticRegression
from sklearn.model_selection import train_test_split
from statsmodels.stats.outliers_influence import variance_inflation_factor 
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
os.chdir('D:/MLP_Session_26_JULY/DATASET')

In [3]:
data = pd.read_csv("iris1.csv") # Reading the Data
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [4]:
data.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [5]:
# Creating the Independent and Dependent variables set

X = data.drop(columns = ['species'])
y = data['species']

In [6]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
y.head()

0    Setosa
1    Setosa
2    Setosa
3    Setosa
4    Setosa
Name: species, dtype: object

In [8]:
# Creating the Train-Test split

x_train,x_test,y_train,y_test = train_test_split(X,y, test_size= 0.5, random_state = 0)

In [9]:
x_train.shape

(75, 4)

In [10]:
x_test.shape

(75, 4)

In [11]:
# Fitting the Logistic Regression model

log_reg = LogisticRegression(multi_class='ovr')

log_reg.fit(x_train,y_train)

LogisticRegression(multi_class='ovr')

In [12]:
# Calculating the predicted value of Y

y_pred = log_reg.predict(x_test)

In [13]:
y_pred

array(['Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Setosa',
       'Virginica', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Virginica', 'Versicolor', 'Versicolor', 'Versicolor',
       'Setosa', 'Versicolor', 'Versicolor', 'Setosa', 'Setosa',
       'Virginica', 'Versicolor', 'Setosa', 'Setosa', 'Virginica',
       'Setosa', 'Setosa', 'Versicolor', 'Versicolor', 'Setosa',
       'Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Virginica',
       'Versicolor', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Setosa', 'Virginica', 'Setosa', 'Setosa',
       'Versicolor', 'Virginica', 'Virginica', 'Versicolor', 'Virginica',
       'Versicolor', 'Virginica', 'Virginica', 'Versicolor', 'Virginica',
       'Virginica', 'Versicolor', 'Virginica', 'Virginica', 'Virginica',
       'Virginica', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Versicolor', 'Versicolor', 'Virginica', 'Setosa', 'Setosa',
       'Virginica', 'Vers

In [14]:
data.species.value_counts()

Versicolor    50
Virginica     50
Setosa        50
Name: species, dtype: int64

In [15]:
# Calculating the Accuracy using accuracy_score

accuracy = accuracy_score(y_test,y_pred)
accuracy

0.88

In [16]:
'''
# Creating the Confusion Matrix
# Confusion Matrix
#             Pred
#            0    1
# Actual 0 [[TN, FP],
#        1 [FN, TP]]
'''
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

array([[21,  0,  0],
       [ 0, 23,  7],
       [ 0,  2, 22]], dtype=int64)

In [17]:
# Creating the Classification report

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

      Setosa       1.00      1.00      1.00        21
  Versicolor       0.92      0.77      0.84        30
   Virginica       0.76      0.92      0.83        24

    accuracy                           0.88        75
   macro avg       0.89      0.89      0.89        75
weighted avg       0.89      0.88      0.88        75



In [18]:
from sklearn.multiclass import OneVsRestClassifier

In [19]:
data = pd.read_csv("iris1.csv") # Reading the Data
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [20]:
# Creating the Independent and Dependent variables set

X = data.drop(columns = ['species'])
y = data['species']

In [21]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [22]:
y.head()

0    Setosa
1    Setosa
2    Setosa
3    Setosa
4    Setosa
Name: species, dtype: object

In [23]:
# Creating the Train-Test split

x_train,x_test,y_train,y_test = train_test_split(X,y, test_size= 0.5, random_state = 0)

In [24]:
x_train.shape

(75, 4)

In [25]:
x_test.shape

(75, 4)

In [26]:
# Fitting the Logistic Regression model

from sklearn.linear_model  import LogisticRegression

log_reg = LogisticRegression()

ovr = OneVsRestClassifier(log_reg)

ovr.fit(x_train,y_train)

OneVsRestClassifier(estimator=LogisticRegression())

In [44]:
# Calculating the predicted value of Y

y_pred = ovr.predict(x_test)

In [45]:
y_pred

array(['Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Setosa',
       'Virginica', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Virginica', 'Versicolor', 'Versicolor', 'Versicolor',
       'Setosa', 'Versicolor', 'Versicolor', 'Setosa', 'Setosa',
       'Virginica', 'Versicolor', 'Setosa', 'Setosa', 'Virginica',
       'Setosa', 'Setosa', 'Versicolor', 'Versicolor', 'Setosa',
       'Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Virginica',
       'Versicolor', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Setosa', 'Virginica', 'Setosa', 'Setosa',
       'Versicolor', 'Virginica', 'Virginica', 'Versicolor', 'Virginica',
       'Versicolor', 'Virginica', 'Virginica', 'Versicolor', 'Virginica',
       'Virginica', 'Versicolor', 'Virginica', 'Virginica', 'Virginica',
       'Virginica', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Versicolor', 'Versicolor', 'Virginica', 'Setosa', 'Setosa',
       'Virginica', 'Vers

In [46]:
data.species.value_counts()

Versicolor    50
Setosa        50
Virginica     50
Name: species, dtype: int64

In [47]:
# Calculating the Accuracy using accuracy_score

accuracy = accuracy_score(y_test,y_pred)
accuracy

0.88

In [48]:
'''
# Creating the Confusion Matrix
# Confusion Matrix
#             Pred
#            0    1
# Actual 0 [[TN, FP],
#        1 [FN, TP]]
'''
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

array([[21,  0,  0],
       [ 0, 23,  7],
       [ 0,  2, 22]], dtype=int64)

In [49]:
# Creating the Classification report

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      Setosa       1.00      1.00      1.00        21
  Versicolor       0.92      0.77      0.84        30
   Virginica       0.76      0.92      0.83        24

    accuracy                           0.88        75
   macro avg       0.89      0.89      0.89        75
weighted avg       0.89      0.88      0.88        75



In [27]:
# Multinomial Logistic Regression

from sklearn.linear_model  import LogisticRegression

data = pd.read_csv("iris1.csv") # Reading the Data
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [28]:
# Creating the Independent and Dependent variables set

X = data.drop(columns = ['species'])
y = data['species']

In [29]:
# Creating the Train-Test split

x_train,x_test,y_train,y_test = train_test_split(X,y, test_size= 0.5, random_state = 0)

In [30]:
x_train.shape

(75, 4)

In [31]:
x_test.shape

(75, 4)

In [32]:
# Fitting the Logistic Regression model

model = LogisticRegression(multi_class='multinomial', solver='lbfgs')

model.fit(x_train,y_train)

LogisticRegression(multi_class='multinomial')

In [33]:
y_pred = model.predict(x_test)

In [34]:
y_pred

array(['Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Setosa',
       'Virginica', 'Setosa', 'Versicolor', 'Versicolor', 'Versicolor',
       'Virginica', 'Versicolor', 'Versicolor', 'Versicolor',
       'Versicolor', 'Setosa', 'Versicolor', 'Versicolor', 'Setosa',
       'Setosa', 'Virginica', 'Versicolor', 'Setosa', 'Setosa',
       'Versicolor', 'Setosa', 'Setosa', 'Versicolor', 'Versicolor',
       'Setosa', 'Virginica', 'Versicolor', 'Setosa', 'Virginica',
       'Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Versicolor',
       'Versicolor', 'Virginica', 'Setosa', 'Virginica', 'Setosa',
       'Setosa', 'Versicolor', 'Virginica', 'Virginica', 'Versicolor',
       'Virginica', 'Versicolor', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Versicolor', 'Versicolor', 'Virginica', 'Versicolor',
       'Virginica', 'Versicolor', 'Setosa', 'Virginica', 'Versicolor',
       'Versicolor', 'Versicolor', 'Versicolor', 'Virginica', 'Setosa',
       'Setosa', 'Virginica'

In [35]:
# Calculating the Accuracy using accuracy_score

accuracy = accuracy_score(y_test,y_pred)
accuracy

0.9333333333333333

In [36]:
'''
# Creating the Confusion Matrix
# Confusion Matrix
#             Pred
#            0    1
# Actual 0 [[TN, FP],
#        1 [FN, TP]]
'''
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

array([[21,  0,  0],
       [ 0, 29,  1],
       [ 0,  4, 20]], dtype=int64)

In [60]:
# Creating the Classification report

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      Setosa       1.00      1.00      1.00        21
  Versicolor       0.88      0.97      0.92        30
   Virginica       0.95      0.83      0.89        24

    accuracy                           0.93        75
   macro avg       0.94      0.93      0.94        75
weighted avg       0.94      0.93      0.93        75

