In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from scipy.special import softmax
from random import randrange
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
sc = StandardScaler()

In [None]:
iris = datasets.load_iris()
df = pd.DataFrame(sc.fit_transform(iris.data), columns=iris.feature_names)
df['target'] = iris.target
X = df.copy()
y = df.drop('target', axis=1)
targets = iris.target_names
nclasses = 3
nfeatures = len(iris.feature_names)
iris.feature_names.append('tar')
feature_names = iris.feature_names

In [None]:
train, test = train_test_split(df, test_size=0.3)

In [None]:
train.shape

(105, 5)

In [None]:
test.shape

(45, 5)

In [None]:
class MultiClassLogReg:    
    def fit(self, X, y, nepochs=1000, lr=0.1):
        nepochs = int(nepochs)
        onehot_encoder = OneHotEncoder(sparse=False)
        y_oh = onehot_encoder.fit_transform(y.reshape(-1,1))
        self.W = np.zeros((X.shape[1], y_oh.shape[1]))
        self.B = 0
        epochs = [i for i in range(nepochs)] 
        losses = []
        W_lst = []
    
        for _ in range(nepochs):
            z = - (np.dot(X, self.W) + self.B)
            h = softmax(z, axis=1)
            N = X.shape[0]
            dw = 1/N * (np.dot(X.T,(y_oh - h)))
            db = 1/N * np.sum(y_oh - h)
            self.W -= lr * dw
            self.B -= lr * db
            W_lst.append(self.W)
            losses.append(self.loss(X, y_oh))

        self.loss_helper = {'epochs': epochs, 'loss': losses}

    def loss(self, X, y):
        z = - (np.dot(X, self.W) + self.B)
        N = X.shape[0]
        loss = 1/N * (np.trace(np.dot(X, np.dot(self.W, y.T))) + np.sum(np.log(np.sum(np.exp(z), axis=1))))
        return loss

    def predict(self, X):
        z = - (np.dot(X, self.W) + self.B)
        h = softmax(z, axis=1)
        return np.argmax(h, axis=1)

In [None]:
def accuracy(y, h):
    return np.sum(y == h)/len(y)*100

In [None]:
kf = KFold(n_splits=5, shuffle=True)

In [None]:
df = df.sample(frac=1)

In [None]:
X = df.drop('target', axis=1)
y = df['target']

In [None]:
y

93     1
141    2
29     0
79     1
31     0
      ..
112    2
88     1
147    2
72     1
25     0
Name: target, Length: 150, dtype: int64

In [None]:
folds = kf.split(X)

In [None]:
features = df.columns

In [None]:
model = MultiClassLogReg()

In [None]:
accs = []
cms = []
recalls = []
precisions = []
per_class = []

In [None]:
i = 1
for train_index, val_index in kf.split(df):
    X_train = df.iloc[train_index].loc[:, features]
    X_val = df.iloc[val_index][features]
    y_train = df.iloc[train_index].loc[:,'target']
    y_val = df.loc[val_index]['target']

    model.fit(np.asarray(X_train), np.asarray(y_train), 2000, 0.01)
    hval = model.predict(np.asarray(X_val))

    cm = confusion_matrix(y_val, hval)
    
    cms.append(cm)

In [None]:
for i in range(5):
    precisions.append(np.diag(cms[i]) / np.sum(cms[i], axis = 0))
    recalls.append(np.diag(cms[i]) / np.sum(cms[i], axis = 1))
    per_class.append(cms[i].diagonal()/cms[i].sum(axis=1).mean(axis=0))
    accs.append(np.trace(cms[i])/np.sum(cms[i])*100)

In [None]:
precisions = np.asarray(precisions).mean(axis=0)
recalls = np.asarray(recalls).mean(axis=0)
per_class = np.asarray(per_class).mean(axis=0)

In [None]:
print(accs)
print(recalls)
print(per_class)
print(precisions)

[26.666666666666668, 46.666666666666664, 46.666666666666664, 26.666666666666668, 20.0]
[0.38357143 0.13852941 0.50904762]
[0.34 0.14 0.52]
[0.32821678 0.2230303  0.36289279]


In [None]:
for i in range(len(accs)):
    print("fold",i,":",accs[i])

fold 0 : 26.666666666666668
fold 1 : 46.666666666666664
fold 2 : 46.666666666666664
fold 3 : 26.666666666666668
fold 4 : 20.0


In [None]:
print("precision")
for i in range(3):
    print(targets[i], precisions[i])
print("recalls")
for i in range(3):
    print(targets[i], precisions[i])
print("per class accuracy")
for i in range(3):
    print(targets[i], precisions[i])

precision
setosa 0.3282167832167832
versicolor 0.22303030303030305
virginica 0.36289279348102876
recalls
setosa 0.3282167832167832
versicolor 0.22303030303030305
virginica 0.36289279348102876
per class accuracy
setosa 0.3282167832167832
versicolor 0.22303030303030305
virginica 0.36289279348102876
