In [46]:
import numpy as np
# from NeuralNetwork2 import Model2

#X = np.array([[0.05], [0.1]], dtype=float) # 2, 3
X = np.array(([[0.05, 0.05, 0.05], [0.1, 0.1, 0.1]]), dtype=float) # 2, 3
y = np.array([[0.01, 0.01, 0.01], [0.99, 0.99, 0.99]], dtype=float) # 2, 3



In [47]:
class Model2:

    def __init__(self):
        # Weights (Parameters)
        #self.W1 = np.array([[0.1498, 0.2995], [0.1997, 0.3494], [0.2497, 0.3993]], dtype=float)
        #self.W2 = np.array([[0.4702, 0.5701, 0.6700], [0.5535, 0.6535, 0.7535]], dtype=float)
        self.W1 = np.array(([[0.15, 0.3], [0.2, 0.35], [0.25, 0.4]]), dtype=float)
        self.W2 = np.array(([[0.5, 0.6, 0.7], [0.55, 0.65, 0.75]]), dtype=float)

        # Biases
        # self.B1 = [[0.4451], [0.4441], [0.4431]] # Length should be number of columns of X
        # self.B2 = [[0.7519], [0.8056]]
        self.B1 = [[0.45], [0.45], [0.45]] # Length should be number of columns of X
        self.B2 = [[0.8], [0.8]]

    def sigmoid(self, z):
            return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self, z):
        # Derivative of Sigmoid Function
        return np.exp(-z)/((1+np.exp(-z))**2)

    def relu(self, z):
        return np.maximum(0, z)
    
    def forward(self, X):
        # Propogate inputs through networks
        self.a1 = X # Just for formality sake
        self.z2 = np.dot(self.W1, X) + self.B1
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.W2, self.a2) + self.B2
        self.a3 = self.sigmoid(self.z3)
        yHat = self.a3
        return yHat

    def costFunction(self, X, y):
        # Compute cost using the weights already stored
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        # Computes partial derivatives of Cost function
        self.yHat = self.forward(X)

        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        print("Delta3:\n", delta3)
        dJdW2 = np.dot(delta3, self.a2.T)
        dJdB2 = delta3.sum(axis=1).reshape(delta3.shape[0],1)

        delta2 = np.multiply(np.dot(self.W2.T, delta3), self.sigmoidPrime(self.z2)) 
        print("Delta2:\n", delta2) 
        dJdW1 = np.dot(delta2, X.T)
        dJdB1 = delta2.sum(axis=1).reshape(delta2.shape[0],1)

        return dJdW1, dJdW2, dJdB2, dJdB1
    
    def tuneParams(self, X, y, learning_rate=0.5):  
        dJdW1, dJdW2, dJdB2, dJdB1 = self.costFunctionPrime(X, y)
        scalar = learning_rate/X.shape[1]
        model.W1 = model.W1 - scalar*dJdW1
        model.W2 = model.W2 - scalar*dJdW2
        model.B1 = model.B1 - scalar*dJdB1
        model.B2 = model.B2 - scalar*dJdB2


In [48]:
model = Model2()

In [49]:
cost1 = model.costFunction(X, y)
print(cost1) # Cost with Initial Values

[0.37734071 0.37734071 0.37734071]


In [50]:
dJdW1, dJdW2, dJdB2, dJdB1 = model.costFunctionPrime(X, y) # Calaculated Derivatives
print("dW2:\n", dJdW2)
print("dW1:\n", dJdW1)
print("dB2:\n", dJdB2)
print("dB1:\n", dJdB1)


Delta3:
 [[ 0.09621877  0.09621877  0.09621877]
 [-0.01123348 -0.01123348 -0.01123348]]
Delta2:
 [[0.00988378 0.00988378 0.00988378]
 [0.01186557 0.01186557 0.01186557]
 [0.01383981 0.01383981 0.01383981]]
dW2:
 [[ 0.17882758  0.17933743  0.17984634]
 [-0.02087801 -0.02093753 -0.02099695]]
dW1:
 [[0.00148257 0.00296514]
 [0.00177984 0.00355967]
 [0.00207597 0.00415194]]
dB2:
 [[ 0.2886563 ]
 [-0.03370045]]
dB1:
 [[0.02965135]
 [0.03559671]
 [0.04151942]]


In [51]:
print("Before:", cost1) # Before and After-tuning comparison
model.tuneParams(X, y)
cost4 = model.costFunction(X, y)
print("After:", cost4)

Before: [0.37734071 0.37734071 0.37734071]
Delta3:
 [[ 0.09621877  0.09621877  0.09621877]
 [-0.01123348 -0.01123348 -0.01123348]]
Delta2:
 [[0.00988378 0.00988378 0.00988378]
 [0.01186557 0.01186557 0.01186557]
 [0.01383981 0.01383981 0.01383981]]
After: [0.36667614 0.36667614 0.36667614]


In [52]:
# Adjusted Weights and Biases
print("W2:\n", model.W2)
print("W1:\n", model.W1)
print("B2:\n", model.B2)
print("B1:\n", model.B1)

W2:
 [[0.4701954  0.57011043 0.67002561]
 [0.55347967 0.65348959 0.75349949]]
W1:
 [[0.14975291 0.29950581]
 [0.19970336 0.34940672]
 [0.249654   0.39930801]]
B2:
 [[0.75189062]
 [0.80561674]]
B1:
 [[0.44505811]
 [0.44406721]
 [0.4430801 ]]


In [36]:
scalar = 3 # Increasing along gradient (REMEMBER to divide scalar by amount of data)
model.W1 = model.W1 + scalar*dJdW1
model.W2 = model.W2 + scalar*dJdW2
model.B1 = model.B1 + scalar*dJdB1
model.B2 = model.B2 + scalar*dJdB2
cost2 = model.costFunction(X,y)

In [37]:
print(cost1, cost2) # # Moving along gradient will cause cost function to increase

[0.37734071 0.37734071 0.37734071] [0.47819614 0.47819614 0.47819614]


In [38]:
dJdW1, dJdW2, dJdB2, dJdB1 = model.costFunctionPrime(X, y)
model.W1 = model.W1 - scalar*dJdW1
model.W2 = model.W2 - scalar*dJdW2
model.B1 = model.B1 - scalar*dJdB1
model.B2 = model.B2 - scalar*dJdB2
cost3 = model.costFunction(X, y)

Delta3:
 [[ 0.0193362   0.0193362   0.0193362 ]
 [-0.01505974 -0.01505974 -0.01505974]]
Delta2:
 [[0.00292474 0.00292474 0.00292474]
 [0.0030085  0.0030085  0.0030085 ]
 [0.00309    0.00309    0.00309   ]]


In [39]:
print(cost2, cost3)

[0.47819614 0.47819614 0.47819614] [0.46502878 0.46502878 0.46502878]


In [40]:
dJdW1

array([[0.00148257, 0.00296514],
       [0.00177984, 0.00355967],
       [0.00207597, 0.00415194]])

In [44]:
dJdW2

array([[ 0.17882758,  0.17933743,  0.17984634],
       [-0.02087801, -0.02093753, -0.02099695]])

In [30]:
model.sigmoidPrime(model.z2)

array([[0.23571561, 0.23571561, 0.23571561],
       [0.23529029, 0.23529029, 0.23529029],
       [0.23485952, 0.23485952, 0.23485952]])

In [45]:
model.W2

array([[0.5 , 0.6 , 0.7 ],
       [0.55, 0.65, 0.75]])