## Neural Network

In [386]:
from typing import Callable, Dict, Tuple, List
from numpy import ndarray
import numpy as np
#weights are dict with str as key and value as ndarray
#forward will return two values, first value is a float
#and second value is the weight

def init_weights(n_in:int) -> Dict[str,ndarray]:
    weights: Dict[str,ndarray] = {}
#     weights['W'] = np.full((n_in,1),1/n_in)
    weights['W'] = np.random.randn(n_in,1)
    weights['B'] = np.random.randn(1,1)
    return weights


def forward(X: ndarray,y:ndarray,weights:Dict[str,ndarray])  -> Tuple[float,Dict[str,ndarray]]: # Tuple(forward info, loss)
    
    # weights['B']
    # weights ['W']
    
#     print(weights['W'].shape)
    
    ### Assert batch sizes of X and y are equal
    assert X.shape[0] == y.shape[0]
    ### Assert that X and w can be dotted
    assert X.shape[1] == weights['W'].shape[0]
    
    ### Assert that B is just a value (is shape (1,1))
    assert weights['B'].shape[0] == 1
    assert weights['B'].shape[1] == 1
    
    ### compute N
    N = X @ weights['W']
    
    ### compute P
    P = N + weights['B']
    
    ### compute L
    L = np.mean(np.power(P-y,2))
    
    # save the information of N,P,L in a dictionary called forward_inf
    forward_info: Dict[str,ndarray] = {} # initializing dictionary with data type specified
        
    ### set the forward_info to remember X,N,P,y
    # for example
    #maybe use this for calculating gradients
    forward_info['X'] = X
    forward_info['N'] = N
    forward_info['P'] = P
    forward_info['y'] = y
    
    return forward_info, L

In [387]:
def backward(forward_info: Dict[str,ndarray],weights: Dict[str,ndarray]) -> Dict[str,ndarray]:
    
#     y = forward_info['y']
#     P = forward_info['P']
#     N = forward_info['N']
#     X = forward_info['X']
    
    
    dLdP = 2* (forward_info['P']-forward_info['y'])
    
    dPdN = np.ones_like(forward_info['N'])
    
    dPdB = np.ones_like(weights['B'])
    
    dLdN = dLdP * dPdN
    
    dNdW = forward_info['X'].T
    
    dLdW = dNdW @ dLdN
    
    dLdB = (dLdP * dPdB).sum(axis=0)
    
    grads: Dict[str,ndarray] = {}
        
    grads['W'] = dLdW
    grads['B'] = dLdB
    
    return grads

In [388]:
def permuteXY(X, y):
    perm = np.random.permutation(X.shape[0])
    return X[perm],y[perm]

def train(X: ndarray, y:ndarray, max_iter: int = 1000, learning_rate: float = 0.01, 
          batch_size: int= 100) -> None: # the weights change #< -- mini-batch gradient descent
    np.random.seed(42)
    start = 0 #<--  initialize start index for mini-batch (we are gonna do without replacement) # no data will be used more than once -> without replacement
    
    #get my weights dict
    weights = init_weights(X.shape[1]) #<-- init_weights look up there^
    
    #shuffle my X a little bit to increase generalizing power
    X,y = permuteXY(X,y)
    
    for i in range(max_iter):
        # in case all data used
        # index is exceeded
        if start >=  X.shape[0]: 
            # shuffle X again
            X,y = permuteXY(X,y)
            # restart the start index
            start = 0
            
        # if batch_size exceeds the last guy, reduce the batch size
        if start + batch_size > X.shape[0]:
            batch_size = X.shape[0] - start
            
        X_batch,y_batch = X[start:start+batch_size] , y[start:start+batch_size]
        start += batch_size
        
        # perform first prediction
        forward_info, loss = forward(X_batch,y_batch,weights)
        
        # calculate gradients
        loss_grad = backward(forward_info,weights)
        
        # update W and B
        weights['B'] -= learning_rate * loss_grad['B']
        weights['W'] -= learning_rate * loss_grad['W']
        
    return weights
    

In [389]:
##### lets load some data
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

#### so please load boston
X, y = load_boston(return_X_y=True)
#boston = load_boston()
#X = boston.data
#y = boston.target

#### please standardize them
scaler = StandardScaler()
X = scaler.fit_transform(X)

#### train test split them
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.3,random_state = 42)

# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

##### reshape y to (m,1) < --- because our code want 1 there
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

In [390]:
weights = train(X_train,y_train,max_iter = 10000, learning_rate = 3e-4, batch_size = 20)


In [391]:
def predict(X:ndarray,weights: Dict[str,ndarray]):
    pred = X@weights['W'] + weights['B']
    return pred

In [392]:
ypred = predict(X_test,weights)
mean_s_r = mean_squared_error(y_test,ypred)
r2 =r2_score(y_test,ypred)

In [393]:
print(mean_s_r)
print(r2)

22.508459589737047
0.697926124016198
