# Logistic Regression for Multi-Class Classification

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
from statistics import mode
import math
from sklearn.metrics import confusion_matrix

In [2]:
#Reading data
f = pd.read_csv('iris.data', header = None)
f.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
random.seed(0)
training_data, test_data = train_test_split(f, train_size = 2/3)#Shuffles data and splits it, ~2/3rd for training
test_data.head()#Testing

Unnamed: 0,0,1,2,3,4
43,5.0,3.5,1.6,0.6,Iris-setosa
139,6.9,3.1,5.4,2.1,Iris-virginica
130,7.4,2.8,6.1,1.9,Iris-virginica
94,5.6,2.7,4.2,1.3,Iris-versicolor
21,5.1,3.7,1.5,0.4,Iris-setosa


In [4]:
test_data.shape, training_data.shape

((50, 5), (100, 5))

In [5]:
y_train = training_data.iloc[:, -1].to_numpy().reshape(-1, 1)
y_test = test_data.iloc[:, -1].to_numpy().reshape(-1, 1)
type(y_train), type(y_test)

(numpy.ndarray, numpy.ndarray)

In [6]:
#Bias terms
bias_training = np.ones(training_data.shape[0]).reshape(-1, 1)
bias_test = np.ones(test_data.shape[0]).reshape(-1, 1)
type(bias_training)

numpy.ndarray

In [7]:
def Zscore(x, dataset):
    for i in range(dataset.shape[1] - 1):
        col = dataset.iloc[:, i].to_numpy()
        temp = (col - np.mean(col))/ (np.std(col, ddof = 1))
        x = np.append(x, temp.reshape(-1, 1), axis = 1)
    return x

In [8]:
zscored_training_data = Zscore(bias_training, training_data)
zscored_test_data = Zscore(bias_test, test_data)
zscored_test_data[:5], zscored_training_data[:5], type(zscored_training_data), type(zscored_training_data)#Testing

(array([[ 1.        , -0.96580929,  1.08195561, -1.22649421, -0.82423781],
        [ 1.        ,  1.09140781,  0.06603485,  0.86349592,  1.13202282],
        [ 1.        ,  1.63278073, -0.69590572,  1.24849411,  0.87118807],
        [ 1.        , -0.31616179, -0.94988591,  0.20349904,  0.08868382],
        [ 1.        , -0.8575347 ,  1.58991599, -1.28149395, -1.08507256]]),
 array([[ 1.        , -0.53734768,  0.78450791, -1.15923614, -1.28421079],
        [ 1.        ,  1.12984083, -0.09696165,  0.84636271,  1.46206482],
        [ 1.        ,  1.12984083,  0.12340574,  0.38794012,  0.28508956],
        [ 1.        ,  2.66878407,  1.66597746,  1.53399661,  1.06973974],
        [ 1.        , -1.56330984,  0.34377313, -1.33114461, -1.28421079]]),
 numpy.ndarray,
 numpy.ndarray)

In [9]:
x_test = np.append(zscored_test_data, y_test.reshape(-1, 1), axis = 1)
x_train = np.append(zscored_training_data, y_train.reshape(-1, 1), axis = 1)
x_train.shape, x_train[:5]#Testing

((100, 6),
 array([[1.0, -0.5373476811053448, 0.7845079060857099,
         -1.1592361356730236, -1.2842107852895654, 'Iris-setosa'],
        [1.0, 1.1298408282907144, -0.09696165131396316, 0.846362714972347,
         1.4620648248001364, 'Iris-virginica'],
        [1.0, 1.1298408282907144, 0.12340573803595534,
         0.38794012053911964, 0.2850895633331214, 'Iris-versicolor'],
        [1.0, 2.668784067733231, 1.6659774634853828, 1.5339966066221886,
         1.069739737644465, 'Iris-virginica'],
        [1.0, -1.56330984073369, 0.34377312738587384, -1.3311446085854841,
         -1.2842107852895654, 'Iris-setosa']], dtype=object))

In [10]:
not_virginica = np.empty((0, 6))
not_versicolor = np.empty((0, 6))
not_setosa = np.empty((0, 6))

#Samples
for i in range(len(x_train)):
    if x_train[i][5] != 'Iris-virginica':
        not_virginica = np.vstack((not_virginica, x_train[i]))
    if x_train[i][5] != 'Iris-versicolor':
        not_versicolor = np.vstack((not_versicolor, x_train[i]))
    if x_train[i][5] != 'Iris-setosa':
        not_setosa = np.vstack((not_setosa, x_train[i]))
not_virginica.shape, not_setosa.shape, not_versicolor.shape

((68, 6), (66, 6), (66, 6))

In [11]:
#Sample 1 : excludes Iris-virginica
not_virginica = np.where(not_virginica == 'Iris-setosa', 1, not_virginica)
not_virginica = np.where(not_virginica == 'Iris-versicolor', 0, not_virginica)

#Sample 2 : excludes Iris-versicolor
not_versicolor = np.where(not_versicolor == 'Iris-virginica', 1, not_versicolor)
not_versicolor = np.where(not_versicolor == 'Iris-setosa', 0, not_versicolor)

#Sample 3 : excludes Iris-setosa
not_setosa = np.where(not_setosa == 'Iris-versicolor', 1, not_setosa)
not_setosa = np.where(not_setosa == 'Iris-virginica', 0, not_setosa)

not_virginica[:5]

array([[1.0, -0.5373476811053448, 0.7845079060857099,
        -1.1592361356730236, -1.2842107852895654, 1],
       [1.0, 1.1298408282907144, 0.12340573803595534,
        0.38794012053911964, 0.2850895633331214, 0],
       [1.0, -1.56330984073369, 0.34377312738587384, -1.3311446085854841,
        -1.2842107852895654, 1],
       [1.0, 0.7451050184300856, -0.3173290406638817,
        0.33063729623496585, 0.15431453428123101, 0],
       [1.0, 0.23212393861591296, -0.3173290406638817,
        0.44524294484327287, 0.4158645923850121, 0]], dtype=object)

In [12]:
def logistic_regression(data):
    x = data[:, 0:5]
    x = x.astype(float)
    y = data[:, -1].reshape(-1, 1)
    random.seed(0)
    w = np.random.uniform(-0.01, 0.01, x.shape[1]).reshape(-1,1)#This is omega
    y_hat = 1 / (1 + np.exp(-x.dot(w)))
    J = np.mean(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
    count = 0
    per_change = 1
    #print(w)
    while count < 1500:
        w = w + (10 ** -4) * np.transpose(x).dot(y - y_hat)
        #print(w)
        y_hat_tmp = 1 / (1 + np.exp((-x.dot(w)).astype(float)))
        J_new = np.mean(y * np.log(y_hat_tmp) + (1 - y) * np.log(1 - y_hat_tmp))
        per_change = abs(J_new - J)
        J = J_new
        count += 1
    return w

In [13]:
not_virginica_pred = {}
not_versicolor_pred = {}
not_setosa_pred = {}
x_test = x_test[:, :5].astype(float)

#Sample 1
w1 = logistic_regression(not_virginica)
y_hat1 = 1 / (1 + np.exp((-x_test.dot(w1)).astype(float)))
#Sample 2
w2 = logistic_regression(not_versicolor)
y_hat2 = 1 / (1 + np.exp((-x_test.dot(w2)).astype(float)))
#Sample 3
w3 = logistic_regression(not_setosa)
y_hat3 = 1 / (1 + np.exp((-x_test.dot(w3)).astype(float)))

prediction = []
for i in range(len(x_test)):
    if y_hat1[i] >= 0.5:
        not_virginica_pred[i] = 'Iris-setosa'
    else:
        not_virginica_pred[i] = 'Iris-versicolor'
    if y_hat2[i] >= 0.5:
        not_versicolor_pred[i] = 'Iris-virginica'
    else:
        not_versicolor_pred[i] = 'Iris-setosa'
    if y_hat3[i] >= 0.5:
        not_setosa_pred[i] = 'Iris-versicolor'
    else:
        not_setosa_pred[i] = 'Iris-virginica'
    prediction.append(mode([not_virginica_pred[i], not_versicolor_pred[i], not_setosa_pred[i]]))

#Accuracy
tmp = 0
for i in range(len(y_test)):
    if y_test[i] == prediction[i]:
        tmp += 1
a = tmp / len(y_test)
print(f'Accuracy is {a * 100:.4}%')

Accuracy is 88.0%


Accuracy varies according to randomness of w in the function, highest I got was 88%, so used it in the pdf.

In [14]:
confusion_matrix(y_test, prediction)#Not sure if I am allowed to use this directly, so using for testing

array([[16,  0,  0],
       [ 0, 10,  6],
       [ 0,  0, 18]], dtype=int64)

In [15]:
c = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
for i in range(len(y_test)):
    if y_test[i] == prediction[i]:
        if y_test[i] == 'Iris-virginica':
            c[2][2] += 1
        elif y_test[i] == 'Iris-versicolor':
            c[1][1] += 1
        elif y_test[i] == 'Iris-setosa':
            c[0][0] += 1
    else:
        if prediction[i] == 'Iris-virginica' and y_test[i] == 'Iris-versicolor':
            c[1][2] += 1
        elif prediction[i] == 'Iris-versicolor' and y_test[i] == 'Iris-virginica':
            c[2][1] += 1
        elif prediction[i] == 'Iris-virginica' and y_test[i] == 'Iris-setosa':
            c[0][1] += 1
        elif prediction[i] == 'Iris-setosa' and y_test[i] == 'Iris-virginica':
            c[1][0] += 1
        elif prediction[i] == 'Iris-versicolor' and y_test[i] == 'Iris-setosa':
            c[2][0] += 1
        elif prediction[i] == 'Iris-setosa' and y_test[i] == 'Iris-versicolor':
            c[0][2] += 1
c

[[16, 0, 0], [0, 10, 6], [0, 0, 18]]

In [16]:
labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
confMAT = pd.DataFrame(np.array(c), columns = labels, index = labels)
confMAT

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
Iris-setosa,16,0,0
Iris-versicolor,0,10,6
Iris-virginica,0,0,18
