### Improved model:NN

In [94]:
import numpy as np
from numpy import ndarray
from typing import Callable
from typing import Dict
from typing import Callable, Dict, Tuple, List

In [95]:
def sigmoid(x:ndarray) -> ndarray:
    return 1/(1+np.exp(-1.0*x))
def leaky_relu(x:ndarray) -> ndarray:
    return np.maximum(0.2* x,x)
def dev_sigmoid(x:ndarray) -> ndarray:
    sigm = sigmoid(x)
    return sigm*(1-sigm)
def dev_leaky_relu(x:ndarray) -> ndarray:
    dx = np.ones_like(x)
    dx[x < 0] = 0.2
    return dx

In [96]:
def init_weights(input_size:int, 
                hidden_size1:int, hidden_size2: int) -> Dict[str,ndarray]:
    weights: Dict[str,ndarray] = {}
    weights['W1'] = np.random.randn(input_size,hidden_size1)
    weights['B1'] = np.random.randn(1,hidden_size1)
    weights['W2'] = np.random.randn(hidden_size1,hidden_size2)
    weights['B2'] = np.random.randn(1,hidden_size2)
    weights['W3'] = np.random.randn(hidden_size2,1)
    weights['B3'] = np.random.randn(1,1)
    return weights

In [127]:
def forward(X: ndarray, y: ndarray,
            weights: Dict[str, ndarray]
           )-> Tuple[float, Dict[str, ndarray]]:
    M1 = X @ weights['W1']
    N1 = M1 + weights['B1']
    O1 = leaky_relu(N1)
    M2 = O1 @ weights['W2']
    N2 = M2 + weights['B2']
    O2 = sigmoid(N2)
    M3 = O2 @ weights['W3']
    P = M3 + weights['B3']
    loss = np.mean(np.power(y-P,2))
#     print("=================1===================")
#     print(weights['W1'].shape,M1.shape,N1.shape,O1.shape)
#     print("=================2===================")
#     print(weights['W2'].shape,M2.shape,N2.shape,O2.shape)
#     print("=================3===================")
#     print(weights['W3'].shape,M3.shape,P.shape,loss.shape)
    
    forward_info: Dict[str,ndarray] = {}
    forward_info['X'] = X
    forward_info['M1'] = M1
    forward_info['N1'] = N1
    forward_info['O1'] = O1
    forward_info['M2'] = M2
    forward_info['N2'] = N2
    forward_info['O2'] = O2
    forward_info['M3'] = M3
    forward_info['P'] = P
    forward_info['y'] = y
    forward_info['loss'] = loss
    return forward_info, loss
    

In [128]:
def loss_gradients(forward_info: Dict[str,ndarray], 
                   weights:Dict[str,ndarray]) -> Dict[str,ndarray]:
    X = forward_info['X']
    M1 = forward_info['M1']
    N1 = forward_info['N1']
    O1 = forward_info['O1']
    M2 = forward_info['M2']
    N2 = forward_info['N2']
    O2 = forward_info['O2']
    M3 = forward_info['M3']
    P = forward_info['P']
    y = forward_info['y']
    loss = forward_info['loss']
    
    W1 = weights['W1']
    W2 = weights['W2']
    W3 = weights['W3']
    B1 = weights['B1']
    B2 = weights['B2']
    B3 = weights['B3']
    
    dLdP = P - y
    dPdM3 = np.ones_like(M3)
    dPdB3 = np.ones_like(B3)
    dM3dW3 = O2.T
#            (10, 23)     (23, 1)    (23, 1)
#     print(dM3dW3.shape, dLdP.shape, dPdM3.shape)
    dLdW3 = dM3dW3 @ (dLdP * dPdM3)
    dLdB3 = (dLdP * dPdB3).sum(axis = 0)
    
    dM3dO2 = W3.T
    dO2dN2 = dev_sigmoid(O2)
    dN2dM2 = np.ones_like(M2)
    dM2dW2 = O1.T
    dN2dB2 = np.ones_like(B2)
    
#          (10, 23)       (23, 1)      (23, 1)     (1, 5)     (23, 5)    (23, 5)
#     print(dM2dW2.shape,dLdP.shape,dPdM3.shape,dM3dO2.shape,dO2dN2.shape,dN2dM2.shape)
    
    dLdW2 = dM2dW2 @ ((((dLdP * dPdM3) @ dM3dO2) * dO2dN2) *dN2dM2 )
    dLdB2 = (((dLdP * dPdM3) * dO2dN2) * dN2dB2).sum(axis = 0)
    
    dM1dO1 = W2.T
    dO1dN1 = dev_leaky_relu(O1)
    dN1dM1 = np.ones_like(M1)
    dM1dW1 = X.T
    dN1dB1 = np.ones_like(B1)
    
#          (13, 506)     (23, 1)    (23, 1)     (1, 5)      (23, 5)      (23, 5)     (5, 10)       (23, 10)       (23, 10)
#     print(dM1dW1.shape,dLdP.shape,dPdM3.shape,dM3dO2.shape,dO2dN2.shape,dN2dM2.shape,dM1dO1.shape, dO1dN1.shape,dN1dM1.shape)
    dLdW1 = dM1dW1 @ (((((((dLdP * dPdM3) @ dM3dO2) * dO2dN2) *dN2dM2 ) @ dM1dO1) * dO1dN1) * dN1dM1)
    dLdB1 = (((((((dLdP * dPdM3) @ dM3dO2) * dO2dN2) *dN2dM2 ) @ dM1dO1) * dO1dN1) * dN1dB1).sum(axis = 0)
    
    loss_gradients: Dict[str,ndarray] = {}
    loss_gradients['W3'] = dLdW3
    loss_gradients['B3'] = dLdB3
    loss_gradients['W2'] = dLdW2
    loss_gradients['B2'] = dLdB2
    loss_gradients['W1'] = dLdW1
    loss_gradients['B1'] = dLdB1
    return loss_gradients


In [129]:
def train(X:ndarray,
         y:ndarray,
         n_iter: int = 1000,
         learning_rate:float = 0.01,
         batch_size: int = 100,
         return_losses: bool = False,
         return_weights: bool = False,
         hidden_size1 = 10,
         hidden_size2 = 5) -> None:
    np.random.seed(42)
    start = 0
    
    #initialize random weights
    weights = init_weights(X_train.shape[1],hidden_size1 = hidden_size1, hidden_size2 = hidden_size2)
    
    perm = np.random.permutation(X.shape[0])
    x,y = X[perm],y[perm]
    
    if return_losses:
        losses = []
    
    # in case all data is used
    for i in range(n_iter):
        if start >= X.shape[0]:
            perm = np.random.permutation(X.shape[0])
            x,y = X[perm],y[perm]
            start = 0
        
        #reduce batch size if exceeds:
        if start >=  X.shape[0]:
            batch_size = X.shape[0] - start
        
        X_batch,y_batch = X[start:start+batch_size],y[start:start+batch_size]
        start += batch_size
        
        #train net using generated batch
        forward_info, loss = forward(X_batch,y_batch,weights)
        
        if return_losses:
            losses.append(loss)
        
        loss_grads = loss_gradients(forward_info,weights)
        
        #loss_grads and weights have same keys
        for key in weights.keys():
            weights[key] -= learning_rate * loss_grads[key]
        
    if return_weights:
        return losses, weights
    
    return None

            

In [130]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [131]:
boston = load_boston()
X,y = load_boston(return_X_y = True)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train,X_test, y_train,y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)
print(X_train.shape)

y_train,y_test = y_train.reshape(-1,1), y_test.reshape(-1,1)

(339, 13)


In [134]:
train_info = train(X_train,y_train,
                   n_iter = 10000,
                   learning_rate = 0.001,
                   batch_size = 23, 
                   return_losses = True,
                   return_weights = True)
losses = train_info[0]
weights = train_info[1]

  return 1/(1+np.exp(-1.0*x))


In [135]:
print(weights)

{'W1': array([[-4.82939595e+00, -1.68741039e+00, -2.80818810e+00,
         7.81697461e+00,  8.03743193e+00, -8.99364554e-01,
        -8.37463349e-01, -5.20299413e+00, -3.78229284e+00,
        -1.02481677e+00],
       [-7.40740274e+00,  1.07276790e+00, -3.27782504e+00,
         2.38930883e+00,  3.41123794e-01,  4.02800446e+00,
        -1.68731195e+00,  9.94420943e+00, -1.03527714e+00,
         1.25416503e+00],
       [-2.64093882e+00,  2.29114055e+00, -1.60393469e+00,
        -3.63500790e+00, -6.44118456e+00,  2.34491848e+00,
        -1.70672221e+00,  2.26063818e+00, -1.41861528e+00,
        -4.39045332e-01],
       [-2.89035186e+00,  1.08228243e+00, -1.55015032e+00,
        -1.37875437e+00, -2.61121214e+00, -6.13815867e+00,
         2.68688884e-01,  4.70061809e+00, -1.12183730e+00,
        -3.06825052e+00],
       [-8.16036925e-01, -1.92024897e-01, -5.40222650e-01,
         1.66081294e+00, -7.40477261e+00,  1.32624214e+00,
        -4.43486068e-01,  8.67805681e-01,  2.59185277e-01,
    

In [136]:
def predict(X:ndarray,
           weights: Dict[str,ndarray]) -> ndarray:
    M1 = X @ weights['W1']
    N1 = M1 + weights['B1']
    O1 = leaky_relu(N1)
    M2 = O1 @ weights['W2']
    N2 = M2 + weights['B2']
    O2 = sigmoid(N2)
    M3 = O2 @ weights['W3']
    P = M3 + weights['B3']
    return P

In [137]:
y_pred = predict(X_test,weights)

In [141]:
print(mean_squared_error(y_test,y_pred))
print(r2_score(y_test,y_pred))

77.8207267163059
-0.028306825674809133
