# MultiClass Classification (One vs All)

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets
from scipy.optimize import minimize

In [2]:
iris_dataset = datasets.load_iris()

In [3]:
feature_names = iris_dataset.feature_names
iris_data = iris_dataset.data
iris_data.shape

(150, 4)

In [4]:
y = iris_dataset.target

In [5]:
iris_df = pd.DataFrame(iris_data,columns=feature_names)
iris_df.insert(0,'Intercept',np.ones((iris_data.shape[0],1)))
iris_df.insert(5,'label',y)
iris_df

Unnamed: 0,Intercept,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,1.0,5.1,3.5,1.4,0.2,0
1,1.0,4.9,3.0,1.4,0.2,0
2,1.0,4.7,3.2,1.3,0.2,0
3,1.0,4.6,3.1,1.5,0.2,0
4,1.0,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...,...
145,1.0,6.7,3.0,5.2,2.3,2
146,1.0,6.3,2.5,5.0,1.9,2
147,1.0,6.5,3.0,5.2,2.0,2
148,1.0,6.2,3.4,5.4,2.3,2


In [6]:
class0 = iris_df[iris_df['label'] == 0]
class1 = iris_df[iris_df['label'] == 1]
class2 = iris_df[iris_df['label'] == 2]

## First Classifier (Label 0) Training set setup
In this case label 0 is our label 1 and all the others are 0

In [7]:
classifier_1_pos = class0.copy()
classifier_1_neg = pd.concat([class1.copy(),class2.copy()])
classifier_1_pos['label'] = 1
classifier_1_neg['label'] = 0

In [8]:
classifier_1 = pd.concat([classifier_1_pos.copy(),classifier_1_neg.copy()])
X1 = classifier_1.iloc[:,0:5]
y1 = classifier_1.iloc[:,[-1]]

## Second Classifier (Label 1)

In [9]:
classifier_2_pos = class1.copy()
classifier_2_neg = pd.concat([class0.copy(),class2.copy()])
classifier_2_pos['label'] = 1
classifier_2_neg['label'] = 0

In [10]:
classifier_2 = pd.concat([classifier_2_pos.copy(),classifier_2_neg.copy()])
X2 = classifier_2.iloc[:,0:5]
y2 = classifier_2.iloc[:,[-1]]

## Third Classifier (Label 2)

In [11]:
classifier_3_pos = class2.copy()
classifier_3_neg = pd.concat([class0.copy(),class1.copy()])
classifier_3_pos['label'] = 1
classifier_3_neg['label'] = 0

In [12]:
classifier_3 = pd.concat([classifier_3_pos.copy(),classifier_3_neg.copy()])
X3 = classifier_3.iloc[:,0:5]
y3 = classifier_3.iloc[:,[-1]]

## Train each classifier

In [13]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [14]:
def computeCost(theta, X, y):
    m = X.shape[0]
    J = -( 1 / m)* (np.log(sigmoid(X.dot(theta))).T.dot(y) + np.log(1 - sigmoid(X.dot(theta))).T.dot(1-y))
    if np.isnan(J[0]):
        return (np.inf)
    return (J[0])

In [15]:
def gradient(theta, X, y):
    m = X.shape[0]
    h = sigmoid(X.dot(theta.reshape(-1,1)))
    grad = (1/m)*X.T.dot(h-y)
    return (grad.flatten())

In [16]:
init_theta_1 = np.ones((X1.shape[1],1))
init_theta_2 = np.ones((X2.shape[1],1))
init_theta_3 = np.ones((X3.shape[1],1))
res1 = minimize(computeCost, init_theta_1, args=(X1.values,y1.values), method='BFGS', jac = gradient)
res2 = minimize(computeCost, init_theta_2, args=(X2.values,y2.values), method='BFGS', jac = gradient)
res3 = minimize(computeCost, init_theta_3, args=(X3.values,y3.values), method='BFGS', jac = gradient)

  J = -( 1 / m)* (np.log(sigmoid(X.dot(theta))).T.dot(y) + np.log(1 - sigmoid(X.dot(theta))).T.dot(1-y))
  J = -( 1 / m)* (np.log(sigmoid(X.dot(theta))).T.dot(y) + np.log(1 - sigmoid(X.dot(theta))).T.dot(1-y))


In [17]:
res1

      fun: 1.1063252044130728e-06
 hess_inv: array([[   8821.49546148,   15782.95454105,   46808.20738216,
         -73246.3772274 ,  -33955.97029504],
       [  15782.95454105,   28242.02515602,   83756.38936435,
        -131064.03544936,  -60759.37928356],
       [  46808.20738216,   83756.38936435,  248400.6175782 ,
        -388700.21667084, -180195.86378764],
       [ -73246.3772274 , -131064.03544936, -388700.21667084,
         608245.20697274,  281973.37932311],
       [ -33955.97029504,  -60759.37928356, -180195.86378764,
         281973.37932311,  130719.85167792]])
      jac: array([1.09923153e-06, 5.62957163e-06, 2.75519164e-06, 3.33063818e-06,
       1.21097831e-06])
  message: 'Optimization terminated successfully.'
     nfev: 30
      nit: 23
     njev: 30
   status: 0
  success: True
        x: array([  2.36156891,   2.42989004,   8.66939009, -13.08394436,
        -5.39542972])

In [18]:
res2

      fun: 0.4835655860655008
 hess_inv: array([[ 870.75909928,  -88.11756592, -147.39875001,   17.00602157,
          12.98988679],
       [ -88.11756592,   47.7576004 ,  -28.17249033,  -43.14275087,
          48.80835376],
       [-147.39875001,  -28.17249033,   77.1980145 ,   35.86635546,
         -44.50637673],
       [  17.00602157,  -43.14275087,   35.86635546,   64.87582062,
        -100.26786384],
       [  12.98988679,   48.80835376,  -44.50637673, -100.26786384,
         182.06156134]])
      jac: array([-8.18778158e-07,  2.18629258e-07, -6.82454901e-06, -4.71462488e-06,
       -6.37703298e-06])
  message: 'Optimization terminated successfully.'
     nfev: 50
      nit: 39
     njev: 47
   status: 0
  success: True
        x: array([ 7.37849013, -0.24501945, -2.7970663 ,  1.31369875, -2.77893921])

In [19]:
res3

      fun: 0.03966182272737892
 hess_inv: array([[ 81742.74622233,  -1024.72226508,   5250.12650943,
        -11607.2032205 , -19876.53117502],
       [ -1024.72226508,    866.62764232,   -243.05669437,
          -851.16391475,    344.37282808],
       [  5250.12650943,   -243.05669437,   2654.75012465,
          -891.22230267,  -4129.85522449],
       [-11607.2032205 ,   -851.16391475,   -891.22230267,
          3147.68088985,   2325.93875543],
       [-19876.53117503,    344.37282808,  -4129.85522449,
          2325.93875543,  10793.44960349]])
      jac: array([9.45444470e-07, 5.81661068e-06, 2.64041361e-06, 4.64622000e-06,
       1.49216293e-06])
  message: 'Optimization terminated successfully.'
     nfev: 72
      nit: 68
     njev: 72
   status: 0
  success: True
        x: array([-42.63536818,  -2.46523482,  -6.68048267,   9.4291056 ,
        18.2849256 ])

In [20]:
theta_1_opt = res1.x.reshape(-1,1)
theta_2_opt = res2.x.reshape(-1,1)
theta_3_opt = res3.x.reshape(-1,1)

## Inference on unseen data

In [21]:
unseen_example = np.array([1, 4.8, 3.3, 1.3, 0.3]).reshape(1,-1)

In [22]:
out_1 = np.round(sigmoid(unseen_example.dot(theta_1_opt)),4)
out_2 = np.round(sigmoid(unseen_example.dot(theta_2_opt)),4)
out_3 = np.round(sigmoid(unseen_example.dot(theta_3_opt)),4)

In [23]:
print('Ci sono {}% di probabilità che il campione appartenga alla classe 0'.format(out_1.ravel()[0]*100))
print('Ci sono {}% di probabilità che il campione appartenga alla classe 1'.format(out_2.ravel()[0]*100))
print('Ci sono {}% di probabilità che il campione appartenga alla classe 2'.format(out_3.ravel()[0]*100))

Ci sono 100.0% di probabilità che il campione appartenga alla classe 0
Ci sono 10.4% di probabilità che il campione appartenga alla classe 1
Ci sono 0.0% di probabilità che il campione appartenga alla classe 2
