In [76]:
"""
An Implementation of the method Neural Ordinary Differential 
Equation presented in: https://arxiv.org/abs/1806.07366


TODO: 
- implement a residual neural network
# - add the training loop
# - add the backpropagation

- implement a neural ODE


NOTES:
- residual structure doesn't make sense? inputs and outputs in the 
  residual block are being broadcasted as they don't have the same 
  dimensions. Also specifiying different depths has no effect on 
  the model predictions.

"""

import numpy as np
import pandas as pd

"""
Initialse the model parameters.
"""
def init_weights(layers, scale=1.0, seed=0):
    rng = np.random.RandomState(seed)
    
    #########################################
    # TODO: add in additional weights layer
    #########################################
    
    return [(scale * rng.randn(m, n), scale * rng.randn(n)) for m, n in zip(layers[:-1], layers[1:])]

"""
A basic residual neural network model set up so that 
skips are performed between layers of equal dimensions.
"""
class residual_NN:    
    def __init__(self, layers):
        
        # intialise the parameters
        self.weights = init_weights(layers)
        self.A = []
        
        # hyperparams
        self.lr = 1e-5
    
    """
    Get the forward prediction of shape (batch_size, state_dim)
    """
    def __call__(self, X):     
        
        A_log = [X]
        for w, b in self.weights:    
            
            # linear + activation
            Z = np.dot(X, w) + b  
            A = np.tanh(Z) 
            X = A
            
            # log hidden states
            A_log.append(A)
            
        # set intermediate states
        self.A = A_log[:-1] + [Z]
        
        return Z   
    
    """
    Update the model weights.
    """
    def step(self, Y):
        
        for idx, (w, b) in reversed(list(enumerate(self.weights))):
            
            # compute the cost function
            if idx == (len(self.weights) - 1):                
                dz = (2/Y.shape[1]) * np.sum(Y - self.A[idx], axis=1, keepdims=True) 
                dw = -np.dot(self.A[idx-1].T, dz)                
                db = -np.sum(dz, axis=0)
                
                print(w.shape)
                print(dw.shape)
                
                self.weights[idx] = (w + dw * self.lr, b + db * self.lr)
                continue
            
            # update the hidden layers               
            dz = (1 - np.square(np.tanh(self.A[idx+1]))) * np.dot(dz, self.weights[idx + 1][0].T)
            dw = -np.dot(self.A[idx].T, dz)
            db = -np.sum(dz, axis=0)
            self.weights[idx] = (w + dw * self.lr, b + db * self.lr)            
        
        asdlaskd
    
"""
Simple Mean-Squared Error Loss
"""
def mse_loss(true, pred):    
    return np.mean(np.sum(np.square(true - pred), axis=1))

"""
Run the training loop for the residual model.
"""
def train_model(model, dataset, loss_func, epochs=10, batch_size=32):
    
    train, val = dataset[0], dataset[1]    
    for ep in range(epochs):
        
        losses = []
        iters = int(len(train) // batch_size) + 1 
        for it in range(iters):
            
            # get a batch of data            
            x_tr = train[it * batch_size: min((it + 1) * batch_size, len(train)) , :-1]            
            y_tr = train[it * batch_size: min((it + 1) * batch_size, len(train)) , -1]
            
            # get the prediction
            y_pred = model(x_tr)
            loss = loss_func(y_tr, y_pred)
            
            # track losses
            losses.append(loss)
            
            # update the weights 
            model.step(y_tr.reshape(-1, 1))
            
        # get validation loss
        x_val, y_val = val[:, :-1], val[:, -1]            
        y_val_pred = model(x_val)
        val_loss = loss_func(y_tr, y_val_pred)
        
        # display loss
        print('Ep: {} - Loss: {} - Val Loss:{} '.format(ep + 1, round(np.mean(losses), 3), round(val_loss, 3)))    


# load in the dataset
dataset = pd.read_csv("./Data/insurance_train.csv")

# convert columns to categorical
dataset["sex"] = dataset["sex"].astype('category')
dataset["region"] = dataset["region"].astype('category')
dataset["smoker"] = dataset["smoker"].astype('category')

# get the categorical columns
cat_columns = dataset.select_dtypes(['category']).columns
dataset[cat_columns] = dataset[cat_columns].apply(lambda x: x.cat.codes)

# convert to a numpy array and normalise
array = dataset.to_numpy()
mean, std = np.mean(array, axis=0), np.std(array, axis=0)
array = (array - mean) / std

# shuffle array and get split
np.random.shuffle(array)
train_s, val_s, test_s = round(0.8 * len(array)), round(0.1 * len(array)), round(0.1 * len(array)) 
train, val, test = array[:train_s, :], array[train_s:(train_s+val_s), :], array[-test_s:, :]

# instantiate the model
x_dim, y_dim = train.shape[1] - 1, 1 
model = residual_NN(
    layers=[x_dim, 60, 10, 60, y_dim],
)

# train the model
train_model(
    model=model,
    dataset=(train, val),
    loss_func=mse_loss,
)



(60, 1)
(10, 1)


ValueError: operands could not be broadcast together with shapes (60,1) (10,1) 