In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt


class Neural_Network(object):
    def __init__(self):
        #Define Parameters
        self.inputLayerSize = 2
        self.outputLayerSize=1
        self.hiddenLayerSize=3
        
        #Define Weights
        self.W1=np.random.rand(self.inputLayerSize,self.hiddenLayerSize)
        self.W2=np.random.rand(self.hiddenLayerSize,self.outputLayerSize)

    def forward(self,X):
        #Propagate inputs through network
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2,self.W2)
        yHat = self.sigmoid(self.z3)
        return yHat
    
    def forward_ReLU(self,X):
        #Propagate inputs through network
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.relu(self.z2)
        self.z3 = np.dot(self.a2,self.W2)
        yHat = self.sigmoid(self.z3)
        return yHat
    
    def sigmoid(self, z):
        #Apply Sigmoid Activation Function
        return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self,z):
        #Derivative of Sigmoid Function
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def relu(self, z):
        return np.maximum(0, z)
    
    def reluPrime(self,z):
        return 1 * (z > 0)
    
    def costFunction(self, X, y):
        #Compute Cost Function with weights already stored in class
        self.yHat=self.forward(X)
        J=0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunction_ReLU(self, X, y):
        #Compute Cost Function with weights already stored in class
        self.yHat=self.forward_ReLU(X)
        J=0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        #Compute derivatives with respect to W1 and W2
        self.yHat=self.forward(X)
        delta3 = np.multiply(-(y-self.yHat),self.sigmoidPrime(self.z3))
        dJdW2=np.dot(self.a2.T,delta3)
        delta2=np.dot(delta3,self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1=np.dot(X.T,delta2)
        return dJdW1,dJdW2
    
    def costFunctionPrime_ReLU(self, X, y):
        #Compute derivatives with respect to W1 and W2
        self.yHat=self.forward_ReLU(X)
        delta3 = np.multiply(-(y-self.yHat),self.reluPrime(self.z3))
        dJdW2=np.dot(self.a2.T,delta3)
        delta2=np.dot(delta3,self.W2.T)*self.reluPrime(self.z2)
        dJdW1=np.dot(X.T,delta2)
        return dJdW1,dJdW2
    

In [None]:
X=np.array(([3,5],[5,1],[10,1]),dtype=float)
y=np.array(([75],[80],[93]),dtype=float)

In [None]:
X


In [None]:
y

In [None]:
X=X/np.amax(X,axis=0)
y=y/100

In [None]:
X,y


In [None]:
NN=Neural_Network()

In [None]:
yH=NN.forward(X)

In [None]:
yH

In [None]:
y

In [None]:
testValues=np.arange(-5,5,0.01)
plt.plot(testValues,NN.sigmoid(testValues),linewidth=2)
plt.plot(testValues, NN.sigmoidPrime(testValues),linewidth=2)
plt.grid(1)
plt.legend(['Sigmoid','SigmoidPrime'])


In [None]:
NN=Neural_Network()

In [None]:
cost1=NN.costFunction(X,y)

In [None]:
cost1

In [None]:
dJdW1,dJdW2=NN.costFunctionPrime(X,y)

In [None]:
dJdW1

In [None]:
dJdW2

In [None]:
scalar=1
NN.W1 = NN.W1+scalar*dJdW1
NN.W2 = NN.W2+scalar*dJdW2
cost2 = NN.costFunction(X,y)
print (cost1,cost2)

## Gradient Descent Example
Now, Let's keep updating weights until cost became lower than a threshold.
We need to recalculate 
- partial L/w every time: ```dJdW1,dJdW2=NN.costFunctionPrime(X,y)```

- weight: ```NN.W1 = NN.W1-scalar*dJdW1```

- cost: ```cost = NN.costFunction(X,y)```

Then, plot costs vs iteration with matplotlib.

In [None]:
NN=Neural_Network()
scalar=3 # Learning Rate
costs=[]
index=0
while True:
    dJdW1,dJdW2=NN.costFunctionPrime_ReLU(X,y)
    NN.W1 = NN.W1-scalar*dJdW1
    NN.W2 = NN.W2-scalar*dJdW2
    cost = NN.costFunction_ReLU(X,y)
    costs.append(cost)
    if(cost<0.001): break

plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Gradient Descent Using ReLU")
plt.plot(costs)

In [None]:
NN=Neural_Network()
scalar=3 # Learning Rate
costs=[]
index=0
while True:
    dJdW1,dJdW2=NN.costFunctionPrime(X,y)
    NN.W1 = NN.W1-scalar*dJdW1
    NN.W2 = NN.W2-scalar*dJdW2
    cost = NN.costFunction(X,y)
    costs.append(cost)
    if(cost<0.001): break

plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Gradient Descent Using Sigmoid")

plt.plot(costs)

### Comparing between different scalar values

In [None]:
import matplotlib.pyplot as plt

scalar_values = [1,10,50, 100]#[0.1, 1, 3, 10]
colors = ['r', 'g', 'b', 'm']
labels = [f'scalar={s}' for s in scalar_values]

for scalar, color, label in zip(scalar_values, colors, labels):
    NN = Neural_Network() 
    costs = []
    
    while True:
        dJdW1, dJdW2 = NN.costFunctionPrime(X, y)
        NN.W1 -= scalar * dJdW1
        NN.W2 -= scalar * dJdW2
        cost = NN.costFunction(X, y)
        costs.append(cost)
        if cost < 0.002:
            break
        if iter > 100000: # It might take too long time
            break
    
    plt.plot(range(len(costs)), costs, color=color, label=label)

plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Learning Curve by Scalar (Learning Rate)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

scalar_values = [3]#[0.1, 1, 3, 10]
colors = ['r', 'g', 'b', 'm']
labels = [f'scalar={s}' for s in scalar_values]

for scalar, color, label in zip(scalar_values, colors, labels):
    NN = Neural_Network() 
    costs = []
    iter = 0
    while True:
        dJdW1, dJdW2 = NN.costFunctionPrime_ReLU(X, y)
        NN.W1 -= scalar * dJdW1
        NN.W2 -= scalar * dJdW2
        cost = NN.costFunction_ReLU(X, y)
        costs.append(cost)
        iter = iter+1
        if cost < 0.001:
            break
        if iter > 100000: # It might take too long time
            break
    
    plt.plot(range(len(costs)), costs, color=color, label=label)

plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Learning Curve by Scalar (Learning Rate)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Changing Layers

- added one more hidden Layer.
- it has 3 or 10 nodes (according to ```self.hiddenLayer2Size```)

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt


class Neural_Network_addLayer(object):
    def __init__(self):
        #Define Parameters
        self.inputLayerSize = 2
        self.outputLayerSize=1
        self.hiddenLayer1Size=3
        self.hiddenLayer2Size=10
        
        #Define Weights
        self.W1=np.random.rand(self.inputLayerSize,self.hiddenLayer1Size)
        self.W2=np.random.rand(self.hiddenLayer1Size,self.hiddenLayer2Size)
        self.W3=np.random.rand(self.hiddenLayer2Size,self.outputLayerSize)

    def forward(self,X):
        #Propagate inputs through network
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2,self.W2)
        self.a3 = self.sigmoid(self.z3)
        self.z4 = np.dot(self.a3,self.W3)
        yHat = self.sigmoid(self.z4)
        return yHat
    
    def forward_ReLU(self,X):
        #Propagate inputs through network
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.relu(self.z2)
        self.z3 = np.dot(self.a2,self.W2)
        self.a3 = self.relu(self.z3)
        self.z4 = np.dot(self.a3,self.W3)
        yHat = self.sigmoid(self.z4)
        return yHat
    
    def sigmoid(self, z):
        #Apply Sigmoid Activation Function
        return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self,z):
        #Derivative of Sigmoid Function
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def relu(self, z):
        return np.maximum(0, z)
    
    def reluPrime(self,z):
        return 1 * (z > 0)
    
    def costFunction(self, X, y):
        #Compute Cost Function with weights already stored in class
        self.yHat=self.forward(X)
        J=0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunction_ReLU(self, X, y):
        #Compute Cost Function with weights already stored in class
        self.yHat=self.forward_ReLU(X)
        J=0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        #Compute derivatives with respect to W1 and W2
        self.yHat=self.forward(X)
        delta4 = np.multiply(-(y-self.yHat),self.sigmoidPrime(self.z4))
        dJdW3=np.dot(self.a3.T,delta4)
        delta3 = np.multiply(-(y-self.yHat),self.sigmoidPrime(self.z3))
        dJdW2=np.dot(self.a2.T,delta3)
        delta2=np.dot(delta3,self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1=np.dot(X.T,delta2)
        return dJdW1,dJdW2,dJdW3
    
    def costFunctionPrime_ReLU(self, X, y):
        #Compute derivatives with respect to W1 and W2
        self.yHat=self.forward_ReLU(X)
        delta4 = np.multiply(-(y-self.yHat),self.reluPrime(self.z4))
        dJdW3=np.dot(self.a3.T,delta4)
        delta3 = np.multiply(-(y-self.yHat),self.reluPrime(self.z3))
        dJdW2=np.dot(self.a2.T,delta3)
        delta2=np.dot(delta3,self.W2.T)*self.reluPrime(self.z2)
        dJdW1=np.dot(X.T,delta2)
        return dJdW1,dJdW2,dJdW3

### Testing

In [None]:
NN=Neural_Network_addLayer()
scalar=0.1 # Learning Rate
costs=[]
index=0
while True:
    dJdW1,dJdW2,dJdW3=NN.costFunctionPrime_ReLU(X,y)
    NN.W1 = NN.W1-scalar*dJdW1
    NN.W2 = NN.W2-scalar*dJdW2
    NN.W3 = NN.W3-scalar*dJdW3
    cost = NN.costFunction_ReLU(X,y)
    costs.append(cost)
    index = index+1
    if(cost<0.001): break
    if(index>10000): break

plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Gradient Descent Using ReLU")
plt.plot(costs)

In [None]:
import matplotlib.pyplot as plt

scalar_values = [0.01, 0.1, 1, 3]
colors = ['r', 'g', 'b', 'm']
labels = [f'scalar={s}' for s in scalar_values]

for scalar, color, label in zip(scalar_values, colors, labels):
    NN = Neural_Network_addLayer() 
    costs = []
    iter = 0
    while True:
        dJdW1,dJdW2,dJdW3=NN.costFunctionPrime_ReLU(X,y)
        NN.W1 = NN.W1-scalar*dJdW1
        NN.W2 = NN.W2-scalar*dJdW2
        NN.W3 = NN.W3-scalar*dJdW3
        cost = NN.costFunction_ReLU(X,y)
        costs.append(cost)
        iter = iter+1
        if cost < 0.001:
            break
        if iter > 10000: 
            break
    
    plt.plot(range(len(costs)), costs, color=color, label=label)

plt.xlabel("Iterations")
plt.xscale('log')
plt.ylabel("Cost")
plt.title("Learning Curve by Scalar (Learning Rate)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()