In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("../../data/multi_classification.csv")

x = df.iloc[:, :-1]
y = pd.get_dummies(df.iloc[:, -1]).astype(int)

x_ssc = StandardScaler()
x_scaled = x_ssc.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x_scaled, y.values, test_size=0.2, shuffle=True)

In [3]:
def softmax_function(h_x):
    exp_values = np.exp(h_x - np.max(h_x, axis=1, keepdims=True))
    y_pred = exp_values / np.sum(exp_values, axis=1, keepdims=True)
    return y_pred


def multi_classification_loss_function(y_real, y_pred):
    loss = -np.mean(np.sum(y_real * np.log(y_pred), axis=1))
    return loss


def get_derivative_of_cost_w(y_real, y_pred, x_real):
    derivative_of_cost_w = -2 * np.dot(x_real.T, (y_real - y_pred))
    return derivative_of_cost_w


def get_derivative_of_cost_b(y_real, y_pred):
    derivative_of_cost_b = -2 * (y_real - y_pred).sum(axis=0)
    return derivative_of_cost_b

In [4]:
x_shape = x.shape[-1]
y_shape = y.shape[-1]

w = np.zeros((x_shape, y_shape))
b = np.array([0])

In [5]:
print("x_train.shape : ",x_train.shape)
print("y_train.shape : ",y_train.shape)
print("w.shape : ",w.shape)
print("b.shape : ",b.shape)

x_train.shape :  (120, 4)
y_train.shape :  (120, 3)
w.shape :  (4, 3)
b.shape :  (1,)


In [6]:
epochs = 1000
learning_rate = 1e-3

# Initialize weights and biases
w = np.random.randn(x_train.shape[1], y_train.shape[1])
b = np.zeros(y_train.shape[1])

for i in range(epochs + 1):
    h_x = x_train.dot(w) + b
    y_pred = softmax_function(h_x)

    loss = multi_classification_loss_function(y_train, y_pred)

    derivative_cost_w = get_derivative_of_cost_w(y_train, y_pred, x_train)
    derivative_cost_b = get_derivative_of_cost_b(y_train, y_pred)

    if i % 100 == 0:
        print(f"{i}/{epochs}")
        print(f"loss: {loss}")

    w = w - learning_rate * derivative_cost_w
    b = b - learning_rate * derivative_cost_b

0/1000
loss: 2.9254073339155906
100/1000
loss: 0.27152289154458137
200/1000
loss: 0.19466841142101735
300/1000
loss: 0.15911874181473357
400/1000
loss: 0.13852691035466977
500/1000
loss: 0.12499540308148202
600/1000
loss: 0.11535847838176115
700/1000
loss: 0.10810369878255068
800/1000
loss: 0.10241718920754533
900/1000
loss: 0.09782148462215015
1000/1000
loss: 0.09401748878290618
