# Import dependencies for preprocessing the data

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [2]:
# Load the data from file

dataset = pd.read_csv("../preprocessing_stuff/PCA/data/irises/IRIS.csv")
x_data = dataset[["sepal_length", "sepal_width", "petal_length", "petal_width"]]

In [3]:
print(f"x_data MEAN:\n{x_data.mean()}\n")
print(f"x_data STD:\n{x_data.std()}")

x_data MEAN:
sepal_length    5.843333
sepal_width     3.054000
petal_length    3.758667
petal_width     1.198667
dtype: float64

x_data STD:
sepal_length    0.828066
sepal_width     0.433594
petal_length    1.764420
petal_width     0.763161
dtype: float64


In [4]:
# Normalize the data

x_data -= x_data.mean(axis=0)
x_data /= x_data.std(axis=0)

In [5]:
print(f"x_data MEAN:\n{x_data.mean()}\n")
print(f"x_data STD:\n{x_data.std()}")

x_data MEAN:
sepal_length   -5.210647e-16
sepal_width    -6.631732e-16
petal_length    1.894781e-16
petal_width    -1.894781e-16
dtype: float64

x_data STD:
sepal_length    1.0
sepal_width     1.0
petal_length    1.0
petal_width     1.0
dtype: float64


In [6]:
# Onehot encode labels

label_encoder = LabelEncoder()
onehot_encoder = OneHotEncoder(sparse=False)

labels = dataset["species"]
coded_labels = label_encoder.fit_transform(labels)
y_data = onehot_encoder.fit_transform(coded_labels.reshape(-1, 1))

print(y_data)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0.

In [7]:
x_data = x_data.to_numpy()

In [8]:
# Shuffle the data

perm = np.random.permutation(len(x_data))
x_data = x_data[perm]
y_data = y_data[perm]

In [9]:
# Train / Test split the data

train_count = int(len(x_data) * 0.8)

x_train = x_data[:train_count]
y_train = y_data[:train_count]

x_test = x_data[train_count:]
y_test = y_data[train_count:]

In [10]:
print(x_train[0])
print(y_train[0])

[ 1.03453895 -0.12454038  0.8168877   1.44312105]
[0. 0. 1.]


# Build The model

In [11]:
import numpy as np
from tqdm import tqdm

In [12]:
class ReLULayer:
    def __init__(self, input_size, output_size):
        self.W = np.random.randn(input_size, output_size) * 0.01  # weight initialization
        self.b = np.zeros((1, output_size)) # bias initialization
        self.X = None
        self.Z = None
        self.A = None

    def forward(self, X):
        self.X = X
        self.Z = np.dot(X, self.W) + self.b
        self.A = np.maximum(0, self.Z)
        return self.A

    def backward(self, dA, learning_rate):
        batch_size = self.X.shape[0]
        dZ = np.multiply(dA, np.int64(self.A > 0))
        dW = np.dot(self.X.T, dZ) / batch_size
        db = np.sum(dZ, axis=0, keepdims=True) / batch_size
        dX = np.dot(dZ, self.W.T)

        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

In [13]:
class SoftmaxLayer:
    def __init__(self, input_size, output_size):
        self.W = np.random.randn(input_size, output_size) * 0.01  # weight initialization
        self.b = np.zeros((1, output_size)) # bias initialization

    def forward(self, X):
        self.X = X
        self.Z = np.dot(X, self.W) + self.b
        exp_Z = np.exp(self.Z - np.max(self.Z))
        self.A = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
        return self.A

    def backward(self, dA, learning_rate):
        batch_size = self.X.shape[0]
        dZ = self.A - dA
        dW = np.dot(self.X.T, dZ) / batch_size
        db = np.sum(dZ, axis=0, keepdims=True) / batch_size
        dX = np.dot(dZ, self.W.T)

        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

# Use full functions

In [14]:
def categorical_cross_entropy(y_true, y_pred):
    """
    Calculate categorical cross-entropy loss.

    Arguments:
    y_true -- true labels (one-hot encoded)
    y_pred -- predicted probability distribution over the classes (softmax output)

    Returns:
    loss -- categorical cross-entropy loss
    """
    # Add a small epsilon value to prevent division by zero
    epsilon = 1e-8

    # Calculate categorical cross-entropy loss
    N = y_true.shape[0]
    loss = -1/N * np.sum(y_true * np.log(y_pred + epsilon))

    return loss

In [15]:
def compute_accuracy(y_true, y_pred):
    """
    Compute the accuracy of the predicted probabilities relative to the true labels.

    Arguments:
    y_true -- a numpy array of shape (num_examples, num_classes) containing the true labels in one-hot format
    y_pred -- a numpy array of shape (num_examples, num_classes) containing the predicted probabilities

    Returns:
    accuracy -- a scalar representing the fraction of examples that were classified correctly
    """
    # Determine the predicted classes
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Determine the true classes
    y_true_classes = np.argmax(y_true, axis=1)

    # Compute the accuracy
    num_correct = np.sum(y_pred_classes == y_true_classes)
    num_examples = y_true.shape[0]
    accuracy = num_correct / num_examples

    return accuracy

# Build and train the model

In [16]:
model = {"relu_0": ReLULayer(4, 6),
         "relu_1": ReLULayer(6, 4),
         "softmax_2": SoftmaxLayer(4, 3)}

In [17]:
def train_model(x, y, learning_rate=0.01, epochs=300):
    loss_history = []
    acc_history = []

    desc_msg = ""
    for epoch in tqdm(range(epochs), desc=desc_msg):
        # forward pass the model
        out_0 = model["relu_0"].forward(x)
        out_1 = model["relu_1"].forward(out_0)
        y_pred = model["softmax_2"].forward(out_1)

        # calculate loss, and accuracy values
        loss = categorical_cross_entropy(y, y_pred)
        acc = compute_accuracy(y, y_pred)
        loss_history.append(loss)
        acc_history.append(acc)

        # backward pass, and weights update
        dloss = y_pred - y
        dy_pred = model["softmax_2"].backward(dloss, learning_rate)
        dout_1 = model["relu_1"].backward(dy_pred, learning_rate)
        dout_2 = model["relu_0"].backward(dout_1, learning_rate)

    return {"acc": acc_history, "loss": loss_history}

In [18]:
history = train_model(x_train, y_train, epochs=1000, learning_rate=0.1)

  0%|          | 0/1000 [00:00<?, ?it/s]

120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu b

  self.A = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
100%|██████████| 1000/1000 [00:00<00:00, 7633.78it/s]

120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu bs: 120
relu bs: 120
120
relu b




In [19]:
print(history["acc"])

[0.575, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.30833333333333335, 0.308

In [20]:
print(history["loss"])

[1.098611691988811, 1.0987111121234083, 1.0988131043182794, 1.0989183314044118, 1.0990268029359849, 1.0991385221018186, 1.0992534918980672, 1.0993717151233555, 1.0994931953582125, 1.0996179356217548, 1.0997459374982705, 1.0998772055384667, 1.1000117427670972, 1.1001495522046227, 1.1002906368745267, 1.1004349998118135, 1.1005826440728654, 1.1007335726693845, 1.1008877886805573, 1.101045295436329, 1.1012060962121462, 1.1013701943827028, 1.1015375934453426, 1.1017082970469763, 1.1018823090150074, 1.1020596333886297, 1.1022402744804822, 1.102424236881315, 1.1026115255553846, 1.1028021458803703, 1.102996103721322, 1.1031934055102564, 1.1033940583369695, 1.1035980700525858, 1.1038054493875582, 1.1040162060860474, 1.1042303510588893, 1.1044478965576194, 1.1046688563560492, 1.1048932460563574, 1.1051210831831224, 1.1053523876345661, 1.1055871819319254, 1.105825491609589, 1.1060673456396395, 1.1063127769028047, 1.10656182278725, 1.1068145257044715, 1.1070709338952742, 1.1073311021724892, 1.1075