# Prepping data

In [3]:
# file imports
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from scipy.optimize import minimize as minimize
from itertools import islice
import time

In [4]:
# splitting data in train test and val set
def data_split(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)  
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
    return X_train, X_test, X_val, y_train, y_test, y_val

data = pd.read_excel(r'C:\Users\RE-Giorgio\Downloads\dataPoints.xlsx')
X = np.array(data.iloc[:,:2])
y = np.array(data.iloc[:, 2])
X_train, X_test, X_val, y_train, y_test, y_val = data_split(X, y)
X_train = X_train.T; X_test = X_test.T; X_val = X_val.T

In [12]:
X_train.shape

(2, 192)

# Neural Network parent class

In [138]:
class ShallowNeuralNetwork:
    
    def __init__(self, X, y, N, sigma):

        self.X = X
        self.y = y
        self.N = N
        self.w = np.random.normal(0.5,0,(self.N, self.X.shape[0]))
        self.b = np.random.normal(0,0,(self.N,1))
        self.v = np.random.normal(0.5,0,(1, self.N))
        self.output = np.zeros(y.shape[0])
        self.rho = 10e-6
        self.sigma = sigma
    
    def concatenate(self,l):

        l = [np.array(array).reshape(-1) for array in l]
        return np.concatenate(l)

    def separate(self, l):

        seclist = [self.X.shape[0]*self.N, self.N, self.N]
        sliced = np.split(l, np.cumsum(seclist))
        #print(l, sliced)
        w = sliced[0].reshape(self.N, self.X.shape[0])
        b = sliced[1].reshape(self.N, 1)
        v = sliced[2].reshape(1, self.N)
        return w, b, v
    
    def loss(self,params, X, y, rho, sigma):
        
        w, b, v = self.separate(params)
        return 0.5 * np.mean(np.square((self.predict(X, w, b, v, sigma) - y))) +\
            rho*np.square(np.linalg.norm(params))
    
    def mse(self, X, y, w, b, v, sigma):
        return 0.5*np.mean(np.square(self.predict(X, w, b, v, sigma) - y)) 

## MLP child class

In [146]:
class Mlp(ShallowNeuralNetwork):
    
    def g(self, x, sigma):
        return (1-np.exp(-2*x*sigma))/(1+np.exp(-2*x*sigma))

    def predict(self, x, w, b, v, sigma):
        
        z = np.dot(w, x) + b
        g_z = self.g(z, sigma)
        self.output = np.dot(v, g_z)
        return self.output
                          
    def optimize(self):
        
        
        function_args = (self.X, self.y, self.rho, self.sigma)
        inits = self.concatenate([self.w, self.b, self.v])

        start = time.time()
        result =  minimize(self.loss, x0 = inits, method='BFGS', args = function_args)
        time_elapsed = time.time() - start
        print(result)
        
        print(self.separate(result.x))
        self.w, self.b, self.v = self.separate(result.x)
        
        func_eval = result.nfev
        grad_eval = result.njev
        iterations = result.nit
        opt_fun = result.fun
        return func_eval, grad_eval, iterations, opt_fun, time_elapsed

## first run

In [147]:
nn = Mlp(X_train, y_train, 18, 1)
func_eval, grad_eval, iterations, opt_fun, time_elapsed = nn.optimize()
print("Training Error :", nn.mse(X_train, y_train, nn.w, nn.b, nn.v, nn.sigma))

      fun: 0.0030902163435267958
 hess_inv: array([[ 5.84746902e+01,  4.83143225e+01,  8.93330156e+00, ...,
         9.02059089e+00,  5.31013806e+01,  1.54424483e+02],
       [ 4.83143225e+01,  2.34692504e+03,  1.53660027e+02, ...,
        -8.61687406e+01,  1.13746025e+02, -8.15955096e+01],
       [ 8.93330156e+00,  1.53660027e+02,  8.66181015e+01, ...,
        -1.40183261e+01, -4.46829379e+01, -8.07626574e+01],
       ...,
       [ 9.02059089e+00, -8.61687407e+01, -1.40183261e+01, ...,
         8.78136406e+03,  3.33550539e+02, -5.82716422e+02],
       [ 5.31013806e+01,  1.13746025e+02, -4.46829379e+01, ...,
         3.33550539e+02,  1.87918323e+04, -1.55451093e+04],
       [ 1.54424483e+02, -8.15955096e+01, -8.07626574e+01, ...,
        -5.82716422e+02, -1.55451093e+04,  2.15918506e+04]])
      jac: array([-1.49034895e-06, -4.68737562e-06,  9.22736945e-06, -7.37581286e-06,
       -1.10032852e-06, -2.26288103e-06, -1.96119072e-06, -2.45953561e-06,
       -2.07999256e-06, -1.37512689e-0

ValueError: shapes (18,1) and (2,192) not aligned: 1 (dim 1) != 2 (dim 0)

In [150]:
nn.predict(X_test)
nn.mse(X_test, y_test)

1.2219066124006976

In [114]:
del w, b, v