# Multiclass Logistic Regression

#### Import Libraries 

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

### Load data 
- We will be using the **Iris Dataset**

In [2]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# Assign colum names to the dataset
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

# Read dataset to pandas dataframe
dataset = pd.read_csv(url, names=names) 

In [3]:
dataset

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [4]:
dataset['Class'].value_counts()

Iris-versicolor    50
Iris-setosa        50
Iris-virginica     50
Name: Class, dtype: int64

### Train test split

In [5]:
X = dataset.drop(['Class'], axis=1)
y = dataset['Class']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,stratify=y)  

In [7]:
ss = StandardScaler()
X_train_ss = ss.fit_transform(X_train)

In [8]:
X_test_ss = ss.transform(X_test)

### Multiclass Logistic Regression ( One VS Rest )

In [9]:
# fit into model (ovr)
ovr = LogisticRegression(multi_class='ovr')
ovr.fit(X_train_ss,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [10]:
# testing on train set 
ovr.score(X_train_ss,y_train)

0.9333333333333333

In [11]:
# testing on test set 
ovr.score(X_test_ss,y_test)

0.9111111111111111

### View Intercepts

Each intercept represent intercepts for each class for the logistic regression. 

In [12]:
print('Ovr intercepts:', ovr.intercept_)


Ovr intercepts: [-2.36018494 -0.9155909  -3.34190207]


### View Coefficients

Below we can see **3 sets of coefficients**, with **4 values each**. 

Each set represents one class and the 4 values are the coefficients for each variable.



In [13]:
print('Ovr coef(s):', ovr.coef_)

Ovr coef(s): [[-0.99760965  1.25576147 -1.54931306 -1.50164683]
 [ 0.03022987 -1.23650171  0.47466609 -0.52971273]
 [ 0.37048062 -0.47526522  2.07988806  2.64805363]]


### Exercise 1  
- Run the logistic regression with the `multi_class='multinomial'`
- Obtain the score
- Compare it with the score of  the logistic regression with `multi_class='ovr'`


In [15]:
ovr_1 = LogisticRegression(multi_class='multinomial')
ovr_1.fit(X_train_ss,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [16]:
ovr_1.score(X_train_ss,y_train)

0.9619047619047619

In [17]:
ovr_1.score(X_test_ss,y_test)

0.9555555555555556

### Exercise 2 
- Produce a classification report with `metrics.classification_report`
- Compare the scores fo the two models.

In [18]:
y_pred_class = ovr_1.predict(X_test_ss)

In [19]:
print(metrics.classification_report(y_test,y_pred_class))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       0.93      0.93      0.93        15
 Iris-virginica       0.93      0.93      0.93        15

       accuracy                           0.96        45
      macro avg       0.96      0.96      0.96        45
   weighted avg       0.96      0.96      0.96        45



In [21]:
y_pred_class_1 = ovr.predict(X_test_ss)

In [22]:
print(metrics.classification_report(y_test,y_pred_class_1))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      0.93      0.97        15
Iris-versicolor       0.87      0.87      0.87        15
 Iris-virginica       0.88      0.93      0.90        15

       accuracy                           0.91        45
      macro avg       0.91      0.91      0.91        45
   weighted avg       0.91      0.91      0.91        45

