In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [2]:
data = pd.read_csv("/kaggle/input/iris/Iris.csv")
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
species_list = data['Species'].unique()
species_list

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [4]:
species_labels, species_list_ordered = pd.factorize(data['Species'])

In [5]:
species_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [6]:
species_list_ordered

Index(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype='object')

In [7]:
data['target'] = species_labels
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,target
0,1,5.1,3.5,1.4,0.2,Iris-setosa,0
1,2,4.9,3.0,1.4,0.2,Iris-setosa,0
2,3,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5,5.0,3.6,1.4,0.2,Iris-setosa,0


In [8]:
data = np.array(data)
data.shape

(150, 7)

In [9]:
# neural network setup

N = 150 # batch size
D_in = 4 # input dimension (# features)
H = 100 # hidden layer dimension (# nodes)
D_out = 3 # output classes

# data setup

x = np.array(data[:,1:5], dtype = np.float64)
y = np.array(data[:,6]) # needs one hot encoding

# x = 150 x 4
# y = 150 x 3

# W1 = 4 x 100
# W2 = 100 x 3
# op = 150 x 3

w1, w2 = np.random.randn(D_in, H).astype(np.float64), np.random.randn(H, D_out).astype(np.float64)

In [10]:
# one hot encoding y

encoder = OneHotEncoder(sparse_output=False)
y = y.reshape(-1,1) # one hot encoder needs 2D array
encoder.fit(y) # encoder object learns parameters for fitting on y
y_one_hot = encoder.transform(y) # returns modifications on data
encoder.categories_

print(y_one_hot[0])
print(y_one_hot.shape)

[1. 0. 0.]
(150, 3)


In [11]:
# putting it all together

def sigmoid(z):
    return (1.0 / (1.0 + np.exp(-z)))

def softmax(z):
    # stable values, keeping dim (150,1)
    exp_z = np.exp(z - np.max(z, axis = 1, keepdims = True)) 
    return exp_z / np.sum(exp_z, axis = 1, keepdims = True) # (150, 3)

def cross_entropy_loss(y_pred, y_true):
    # ensure numerical stability: clip predictions close to 0 or 1
    y_pred = np.clip(y_pred, 1e-12, 1.0 - 1e-12)
    # categorical cross-entropy loss: L = - (1/N) * sum(y_true * log(y_pred))
    N = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / N
    return loss

for iteration in range(500):
    
    # forward propagation

    # sigmoid activation
    z1=np.array(np.dot(x, w1),dtype=np.float64) # explicit type assignment
    h = sigmoid(z1) # (150, 100)
    z2 = h.dot(w2) # (150, 3)
    y_pred = softmax(z2) # (150, 3)
    loss = cross_entropy_loss(y_pred, y)
    if iteration % 20 == 0:
        print('iteration: ', iteration, 'loss: ', loss)

    # calculate gradients for back propagation
    grad_z2 = y_pred - y # (150, 3)
    grad_w2 = h.T.dot(grad_z2) # (100, 3)
    grad_h = grad_z2.dot(w2.T) # (150, 100)
    grad_z1 = grad_h * h * (1 - h) # element-wise product for sigmoid derivative (150, 100)
    grad_w1 = (x.T).dot(grad_z1) # (4, 100)

    LR = 1e-4
    w1 -= LR * grad_w1.astype(np.float64)
    w2 -= LR * grad_w2.astype(np.float64)

iteration:  0 loss:  55.013028886294684
iteration:  20 loss:  21.36890860300118
iteration:  40 loss:  10.741318759311241
iteration:  60 loss:  5.579587805539548
iteration:  80 loss:  4.1538176909532485
iteration:  100 loss:  3.6399995595170602
iteration:  120 loss:  3.3638117279949946
iteration:  140 loss:  3.3196157403046995
iteration:  160 loss:  3.325980757076539
iteration:  180 loss:  3.334790512776029
iteration:  200 loss:  3.338175310743888
iteration:  220 loss:  3.3349408178451494
iteration:  240 loss:  3.326760348297758
iteration:  260 loss:  3.315803830937204
iteration:  280 loss:  3.314366526403502
iteration:  300 loss:  3.312046381589433
iteration:  320 loss:  3.309428425681622
iteration:  340 loss:  3.3078880270102347
iteration:  360 loss:  3.3079731602054707
iteration:  380 loss:  3.3125330114153995
iteration:  400 loss:  3.3291755662336646
iteration:  420 loss:  3.3557435801758784
iteration:  440 loss:  3.3733342267664557
iteration:  460 loss:  3.375944020434538
iteration