In [136]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [137]:
#load in the iris dataset and split between targets and features
iris = datasets.load_iris()
features = iris.data
target = iris.target

#One Hot Encode the target data
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(target.reshape(-1,1))

#split the data into testing and training data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)

In [138]:
#define and vectorize the ReLu function
def unvec_ReLu(z):
    if z <= 0:
        return 0
    else:
        return z
ReLu = np.vectorize(unvec_ReLu)

#define the SoftMax function
def SoftMax(z):
    return np.exp(z) / np.sum(np.exp(z))

#define the Feed Forward function
def ff(x, w1, w2, b1, b2):
    z1 = w1.dot(x) + b1 #compute z1 by finding the dot product of the input layer and random weights
    a1 = ReLu(z1) #compute the hidden layer by applying the ReLu function to z1
    z2 = w2.dot(a1) + b2 #compute z2 by finding the dot product of the hidden layer and random weights
    y_pred = SoftMax(z2) #compute the outputs or preductions by applying the SoftMax function to z2
    return z1, a1, z2, y_pred #return all values


In [139]:
#define loss function - binary crossentropy
def loss(y_true, y_pred):
   return  -(np.sum(y_true * np.log(y_pred)))

#define derivative of the SoftMax function
def SoftMax_dv(z):
   return SoftMax(z) * (1 - SoftMax(z))

#define derivative of ReLu
def un_vec_ReLu_dv(z):
   if z <= 0:
      return 0
   else:
      return 1
   
ReLu_dv = np.vectorize(un_vec_ReLu_dv)
   
#set the learning rate
lr = 0.01

In [140]:
#randomize weights
W1 = np.random.rand(2,4)
W2 = np.random.rand(3,2)
b1 = np.random.rand(2,)
b2 = np.random.rand(3,)
loss=[]

#Epoch
for i in range(len(y_train)):

    z1, a1, z2, y_pred =  ff(X_train[i], W1, W2, b1, b2)
    loss.append
    y_pred += 0.0001 
    y_pred /= sum(y_pred)

    #backpropogation - breaking down components of partial derivatives
    dLdZ2 = (-y_train[i] / y_pred) * SoftMax_dv(z2) #derivative of loss with respect to Z2 - dL/dYhat * dYhat/dz2
    dLdZ1 = dLdZ2.dot(W2) * ReLu_dv(z1) #derivative of loss with respect to Z2 - dL/dZ2 * dz2/da1 * da1/dz1
    dLdW1 = np.outer(dLdZ1, X_train[i]) #dldz1 outer x$
    dLdb1 = dLdZ1
    dldW2 = np.outer(dLdZ2, a1) #dldz2 outer a1
    dldb2 = dLdZ2

    W1 -= lr * dLdW1
    W2 -= lr * dldW2
    b1 -= lr * dLdb1
    b2 -= lr * dldb2

    print(W2, W1, b1, b2)



#Multiple epochs, plot loss

[[0.35414082 0.45498637]
 [0.14437945 0.90414036]
 [0.76761781 0.37608009]] [[0.84397395 0.10790536 0.34514925 0.50316022]
 [0.34072977 0.86053608 0.90009613 0.09569852]] [0.3466643  0.53034446] [0.49114694 0.05774267 0.6521452 ]
[[0.42317125 0.53336381]
 [0.19538172 0.96204855]
 [0.79333479 0.40527927]] [[0.88285107 0.12557678 0.37342352 0.51234935]
 [0.40648609 0.89042532 0.94791891 0.11124092]] [0.35373287 0.54230016] [0.50061225 0.06473599 0.65567146]
[[0.60643814 0.74541869]
 [0.30359156 1.08725624]
 [0.88840531 0.51528368]] [[1.00348067 0.17058783 0.47784915 0.54475731]
 [0.57302282 0.95256589 1.09208503 0.15598213]] [0.37173729 0.56715639] [0.51956267 0.07592526 0.66550207]
[[0.60643814 0.74541869]
 [0.30359156 1.08725624]
 [0.88840531 0.51528368]] [[1.00348067 0.17058783 0.47784915 0.54475731]
 [0.57302282 0.95256589 1.09208503 0.15598213]] [0.37173729 0.56715639] [0.51956267 0.07592526 0.66550207]
[[0.60643814 0.74541869]
 [0.30359156 1.08725624]
 [0.88840531 0.51528368]] [[1.