In [286]:
import numpy as np
from sklearn.datasets import make_regression
np.random.seed(41)

In [287]:
X,y = make_regression(100, 2, bias=1.5, random_state=41)
y = np.expand_dims(y, axis=-1)

## Coding neural network static way 

Network has 3 layers

- layer 1 : 3 nuerons, sigmoid activation
- layer 2: 2 nuerons, sigmoid activation
- layer 3: 2 nueron, no activation

In [289]:
## Intializing the weights

W1 = np.random.normal(size=(2,3)) * 0.1
b1 = np.zeros(shape=(3))
#Z1
#A1

W2 = np.random.normal(size=(3, 2)) * 0.1
b2 = np.zeros(shape=(2))
#Z2
#A2

W3 = np.random.normal(size=(2, 1)) * 0.1
b3 = np.zeros(shape=(1))
#Z3
#y_hat

#L

In [276]:
### Training loop

In [314]:
%time

epochs = 100
lr = 1e-2

for epoch in range(epochs):

    ## forword propagtion

    Z1 = np.dot(X, W1) + b1
    A1 = 1/(1+np.exp(-Z1))

    Z2 = np.dot(A1,W2) + b2
    A2 = 1/(1+np.exp(-Z2))

    Z3 = np.dot(A2,W3) + b3
    y_hat = Z3 ## Linear activationb becuase the target variable is continuous

    ## Loss 
    # MSE

    L = (y-y_hat)**2
    loss_value = np.sum(L)/ L.shape[0]
    print(f"Epoch: {epoch+1}, Loss: {loss_value}")

    ### Backpropagation
    m = X.shape[0]

    #layer 3
    dy_hat = -2*(y-y_hat)
    dZ3 = dy_hat ## becuase there is no activation Z3 = y_hay hence dZ3 = dy_hat
    dW3 = 1/m * np.dot(A2.T,dZ3)
    db3 = 1/m * np.sum(dZ3, axis=0)

    #layer2
    dA2 = np.dot(dZ3,W3.T)
    dZ2 = A2 * (1-A2) * dA2
    dW2 = 1/m * np.dot(A1.T , dZ2 )
    db2 = 1/m * np.sum(dZ2, axis=0)

    #layer3

    dA1 = np.dot(dZ2,W2.T)
    dZ1 = A1 * (1-A1) * dA1
    dW1 = 1/m * np.dot(X.T,dZ1)
    db1 = 1/m * np.sum(dZ1, axis=0)

    ## optimization

    

    W1 = W1 - lr * dW1
    b1 = b1 - lr * db1

    W2 = W2 - lr * dW2
    b2 = b2 - lr * db2

    W3 = W3 - lr * dW3
    b3 = b3 - lr * db3
    
    




Wall time: 0 ns
Epoch: 1, Loss: 4577.111710161919
Epoch: 2, Loss: 4576.320251816936
Epoch: 3, Loss: 4575.573257325634
Epoch: 4, Loss: 4574.867556131271
Epoch: 5, Loss: 4574.2002949562
Epoch: 6, Loss: 4573.568904795146
Epoch: 7, Loss: 4572.971069040574
Epoch: 8, Loss: 4572.40469251747
Epoch: 9, Loss: 4571.867871377428
Epoch: 10, Loss: 4571.358863909117
Epoch: 11, Loss: 4570.876062370588
Epoch: 12, Loss: 4570.4179659433585
Epoch: 13, Loss: 4569.983154854929
Epoch: 14, Loss: 4569.570265621451
Epoch: 15, Loss: 4569.177967232204
Epoch: 16, Loss: 4568.804937938826
Epoch: 17, Loss: 4568.44984213003
Epoch: 18, Loss: 4568.1113065700165
Epoch: 19, Loss: 4567.787895055972
Epoch: 20, Loss: 4567.478080303462
Epoch: 21, Loss: 4567.18021158976
Epoch: 22, Loss: 4566.89247636198
Epoch: 23, Loss: 4566.612853631345
Epoch: 24, Loss: 4566.339056505169
Epoch: 25, Loss: 4566.068460627028
Epoch: 26, Loss: 4565.798014571816
Epoch: 27, Loss: 4565.524127339287
Epoch: 28, Loss: 4565.242526966127
Epoch: 29, Loss: 

## With classes and functions 

- accommodates any number of layer in sequence manner
- supports regression and , sigmoid activation for hidden layers

In [320]:
class Layer():
    def __init__(self, inputs, outputs, activation='sigmoid', name=None,W=None, b=None):
        self.W = np.random.normal(size=(inputs, outputs)) * 0.1
        self.b = np.zeros(shape=(outputs))
        self.Z = None
        self.A = None
        self.dW, self.db, self.dZ, self.dA = None, None, None, None
        self.name = name
        self.activation = activation
    def __call__(self, X):
        self.Z = np.dot(X, self.W) + self.b
        if self.activation is not None:
            self.A = 1/(1+np.exp(-self.Z))
            return self.A
        return self.Z

In [321]:
class Model():
    def __init__(self, graph, name=None):
        self.layers = graph
        self.name = name
        self.A0 = None
    
    def __call__(self, X):
        self.A0 = X
        activations = X
        for layer in self.layers:
            activations = layer(activations)
        return activations
    
    def backPropagate(self, y, y_hat):
        
        m = y.shape[0]
        dy_hat = -2 * (y - y_hat)        
        self.layers[-1].dA = dy_hat
        
        l = len(self.layers) -1 
        while l >= 0:
            #print(l)
            layer = self.layers[l] ## layer object
            
            ## Check if activaiton is there
            if layer.activation is None:
                layer.dZ = layer.dA
            else:
                layer.dZ = layer.A * (1-layer.A) * layer.dA                
                
            if l==0: A_prev = self.A0
            else: A_prev = self.layers[l-1].A
                
            layer.dW = 1/m * np.dot(A_prev.T, layer.dZ) #   100,3 100,2
            layer.db = 1/m * np.sum(layer.dZ, axis=0)
            if l > 0:
                self.layers[l-1].dA = np.dot(layer.dZ,layer.W.T) ## 3,2 100,2
            l -= 1
            
    def optimize(self, lr = 0.01):
        for layer in self.layers:
            layer.W = layer.W - lr * layer.dW
            layer.b = layer.b - lr * layer.db
            
        
            

In [322]:
def loss_function(y, y_hat):
    m = y.shape[0]
    
    loss_value = 1/m * np.sum((y-y_hat)**2)
    return loss_value

In [323]:
l1 = Layer(2,3, W=W1, b=b1) 
l2 = Layer(3,2, W=W2, b=b2)
l3 = Layer(2,1, activation=None, W=W3, b=b3)


model1 = Model([
    l1,
    l2,
    l3
])

In [324]:
# defien a model

#model1 = Model([
#    Layer(2,3),
#    Layer(3,2),
#    Layer(2,1, activation=None)
#])

lr = 1e-2
epochs = 100

for epoch in range(epochs):
    ### forwprd propagate
    y_preds = model1(X)
    #print(y_preds)
    
    loss_value = loss_function(y, y_preds)
    model1.backPropagate(y, y_preds)
    model1.optimize(lr=lr)
    
    print(f"Epoch: {epoch+1}, loss: {loss_value}")
    
    

Epoch: 1, loss: 4578.443994926085
Epoch: 2, loss: 4577.568578802831
Epoch: 3, loss: 4576.7421099635885
Epoch: 4, loss: 4575.960556254161
Epoch: 5, loss: 4575.220316593636
Epoch: 6, loss: 4574.518165030393
Epoch: 7, loss: 4573.851197360397
Epoch: 8, loss: 4573.216778948154
Epoch: 9, loss: 4572.612492804127
Epoch: 10, loss: 4572.036087220841
Epoch: 11, loss: 4571.485422371955
Epoch: 12, loss: 4570.958415241454
Epoch: 13, loss: 4570.452982073256
Epoch: 14, loss: 4569.966977208932
Epoch: 15, loss: 4569.498126702122
Epoch: 16, loss: 4569.043954444812
Epoch: 17, loss: 4568.601697685498
Epoch: 18, loss: 4568.168207719636
Epoch: 19, loss: 4567.739830122735
Epoch: 20, loss: 4567.312257078444
Epoch: 21, loss: 4566.880341985164
Epoch: 22, loss: 4566.437863402495
Epoch: 23, loss: 4565.977221238962
Epoch: 24, loss: 4565.489042493237
Epoch: 25, loss: 4564.961666308613
Epoch: 26, loss: 4564.380467869536
Epoch: 27, loss: 4563.726966824513
Epoch: 28, loss: 4562.97764728872
Epoch: 29, loss: 4562.1023917

In [303]:
model1.layers[1].W

array([[-0.0153676 ,  0.07898518],
       [-0.12262158, -0.0948007 ],
       [-0.05696539, -0.09771502]])

In [304]:
W2

array([[-0.0153676 ,  0.07898518],
       [-0.12262158, -0.0948007 ],
       [-0.05696539, -0.09771502]])