#### Train an MLP with 2 inputs, 3-4+ hidden units and one output on the following examples (XOR function):


Step 1) Build a perceptron
A stack of perceptrons together == hidden layer a.k.a a dense layer

In [259]:
import numpy as np
import pandas as pd
np.random.seed(42)
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn import metrics


In [81]:
class MLP:
    def __init__(self, X, y, hidden_units=3, epochs=100, learning_rate=.1, random_state=None):
        self.X = X
        self.y = y
        self.hidden_units = hidden_units
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.random_state = random_state
    
    def checkParameters(self):
        self.num_inputs = self.X.shape[1]
        self.num_outputs = self.y.shape[1]
        if self.X.shape[0] != self.y.shape[0]:
            raise ValueError('X and Y have mismatched shapes')
        if self.num_inputs < 1:
            raise ValueError('Must have at least 1 input')
        if self.num_outputs < 1:
            raise ValueError('Must have at least 1 output')
        if self.hidden_units < 1:
            raise ValueError('Must have at least 1 hidden unit')
        if self.epochs < 1:
            raise ValueError('Must train for at least 1 epoch')
        #Is it a max of 1 also??
        if self.learning_rate <=0:
            raise ValueError('Learning rate must be greater than 0')
    
    def tan_h_deriv(self, x):
        return 1 - (x**2)
    def sigmoid(self, x, deriv=False):
        sig = (1 / (1 + np.exp(-x)))
        if deriv is True:
            return x * (1-x)
        return (1 / (1 + np.exp(-x)))
     
    def myLogLoss(self, deriv=False):
        #epsilon is to avoid divide by zero errors
        epsilon = 1e-5
        if deriv is True:
            return -(self.y/self.output) + ((1-self.y)/(1-self.output))
        return (1/self.num_inputs) * np.sum(-((self.y * np.log(self.output+epsilon)) + (1 - self.y)  * np.log(1 - self.output + epsilon)))
    
 
    def forward(self):
        self.z1 = self.X@self.w1 + self.b1
        self.a1 = np.tanh(self.z1)
        self.z2 = self.a1@self.w2 + self.b2
        self.output = self.sigmoid(self.z2)
   
    
    def backward(self):
        self.loss = self.myLogLoss()
        d_output = self.myLogLoss(deriv=True) * self.sigmoid(self.output, deriv=True)
        d_w2 = (1/self.num_inputs) * (self.a1.T @ d_output)
        d_b2 = (1/self.num_inputs) * (np.sum(d_output, axis = 0, keepdims=True))
        d_z1 = (d_output@self.w2.T) * self.tan_h_deriv(self.a1)
        d_w1 = (1/self.num_inputs) * (self.X.T@d_z1)
        d_b1 = (1/self.num_inputs) * (np.sum(d_z1, axis=0, keepdims=True))

        self.w1 -= (self.learning_rate * d_w1)
        self.w2 -= (self.learning_rate * d_w2)
        self.b1 -= (self.learning_rate * d_b1)
        self.b2 -= (self.learning_rate * d_b2)

    def fit(self, x, y):
        self.checkParameters()
        if self.random_state is not None:
            np.random.seed(self.random_state)
        
        #Xavier initialization to train model to converge faster
        self.w1 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.num_inputs+self.hidden_units)),
                                size = (self.num_inputs, self.hidden_units))
        self.w2 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.hidden_units+self.num_outputs)),
                                size = (self.hidden_units, self.num_outputs))
        self.b1 = np.zeros((1,self.hidden_units))       
        self.b2 = np.zeros((1,self.num_outputs))
        for i in range (self.epochs):
            self.forward()
        #    print("Epoch {} Loss is {}".format(i+1, self.loss))
            self.backward()
    
    def predict(self, x):
        self.X = x
        self.forward()
        return self.output

In [82]:
x = np.array(([0, 1],[1,0], [1,1], [0,0]))
y = np.array(([1],[1],[0],[0]))
test = MLP(x,y,4,random_state=0, epochs=100000)
test.fit(x,y)

In [83]:
np.set_printoptions(suppress=True)
test.predict(x)

array([[0.99985229],
       [0.99998968],
       [0.00014796],
       [0.00009289]])

In [84]:
data = []
y_data = []
for n in range(100):
    data.append(np.random.uniform(low=-1.0,high=1.0, size=(4, 1)))
    y_data.append(np.sin(data[-1][0] - data[-1][1] + data[-1][2] - data[-1][3]))
y_data = np.array(y_data)
X_data = np.array(data)
X_data = X_data.reshape(100, 4)

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=0, test_size = 0.25, train_size = 0.75)

In [86]:
print(np.max(y_data))
print(np.min(y_data))

0.9998459796152872
-0.9996706301937582


In [87]:
class MLP:
    def __init__(self, X, y, hidden_units=3, epochs=100, learning_rate=.1, random_state=None):
        self.X = X
        self.y = y
        self.hidden_units = hidden_units
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.random_state = random_state

    def checkParameters(self):
        self.num_inputs = self.X.shape[1]
        self.num_outputs = self.y.shape[1]
        if self.X.shape[0] != self.y.shape[0]:
            raise ValueError('X and Y have mismatched shapes')
        if self.num_inputs < 1:
            raise ValueError('Must have at least 1 input')
        if self.num_outputs < 1:
            raise ValueError('Must have at least 1 output')
        if self.hidden_units < 1:
            raise ValueError('Must have at least 1 hidden unit')
        if self.epochs < 1:
            raise ValueError('Must train for at least 1 epoch')
        #Is it a max of 1 also??
        if self.learning_rate <=0:
            raise ValueError('Learning rate must be greater than 0')


    def sigmoid(self, x, deriv=False):
        if deriv is True:
            return x * (1-x)
        return (1 / (1 + np.exp(-x)))

    def myLogLoss(self, deriv=False):
        #epsilon is to avoid divide by zero errors
        epsilon = 1e-5
    
        if deriv is True:
            return -(self.y/self.output) + ((1-self.y)/(1-self.output))
        return (1/self.num_inputs) * np.sum(-((self.y * np.log(self.output+epsilon)) + (1 - self.y)  * np.log(1 - self.output + epsilon)))
    
    def MSE(self, deriv=False):
        if deriv == True:
            return -2 * (self.y - self.output)
        return (np.square(self.y - self.output).mean())

    def forward(self):
        self.z1 = self.X@self.w1 + self.b1
        self.a1 = np.tanh(self.z1)
        self.z2 = self.a1@self.w2 + self.b2
        self.output = np.tanh(self.z2)
    
    def tan_h_deriv(self, x):
        return 1 - (x**2)
    
    def backward(self):
        self.loss = self.MSE()
        #added the times F'() see results four. Without it resutlts1
      #  d_output = (self.output - self.y) * (1-(self.output**2))
        d_output = self.MSE(deriv=True) * self.tan_h_deriv(self.output)
        d_w2 = (1/self.num_inputs) * (self.a1.T @ d_output)
        d_b2 = (1/self.num_inputs) * (np.sum(d_output, axis = 0, keepdims=True))
        d_z1 = (d_output@self.w2.T) * self.tan_h_deriv(self.a1)
        d_w1 = (1/self.num_inputs) * (self.X.T@d_z1)
        d_b1 = (1/self.num_inputs) * (np.sum(d_z1, axis=0, keepdims=True))

        self.w1 -= (self.learning_rate * d_w1)
        self.w2 -= (self.learning_rate * d_w2)
        self.b1 -= (self.learning_rate * d_b1)
        self.b2 -= (self.learning_rate * d_b2)

    def fit(self, x, y):
        self.checkParameters()
        if self.random_state is not None:
            np.random.seed(self.random_state)

#         Xavier initialization to train model to converge faster
        self.w1 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.num_inputs+self.hidden_units)),
                                size = (self.num_inputs, self.hidden_units))
        self.w2 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.hidden_units+self.num_outputs)),
                                size = (self.hidden_units, self.num_outputs))
        self.b1 = np.zeros((1,self.hidden_units))
        self.b2 = np.zeros((1,self.num_outputs))
        for i in range (self.epochs):
            self.forward()
        #    print("Epoch {} Loss is {}".format(i+1, self.loss))
            self.backward()

    def predict(self, x):
        self.X = x
        self.forward()
        return self.output

In [103]:
results6 = pd.DataFrame(index=['epochs', 'hidden units', 'train_error', 'test_error'])

In [83]:
results4 = pd.DataFrame(index=['epochs', 'hidden units', 'train_error', 'test_error'])

In [58]:
results = pd.DataFrame(index=['epochs', 'hidden units', 'train_error', 'test_error'])

In [66]:
results2 = pd.DataFrame(index=['epochs', 'hidden units', 'train_error', 'test_error'])

In [70]:
results3 = pd.DataFrame(index=['epochs', 'hidden units', 'train_error', 'test_error'])

In [59]:
def SSE(predict, y):
    return np.power((predict - y), 2).mean()

In [60]:
test_epochs = [1000, 10000, 100000, 1000000]

In [104]:
model = 1
for epoch in test_epochs:
    for hidden in range(2, 6):
        print('testing model {}'.format(model))
        test = MLP(X_train,y_train,hidden,random_state=0, epochs=epoch)
        test.fit(X_train, y_train)
        train_error = SSE(test.predict(X_train), y_train)
        test_error = SSE(test.predict(X_test), y_test)
        results6[model] = [epoch, hidden, train_error, test_error]
        model += 1

testing model 1
testing model 2
testing model 3
testing model 4
testing model 5
testing model 6
testing model 7
testing model 8
testing model 9
testing model 10
testing model 11
testing model 12
testing model 13
testing model 14
testing model 15
testing model 16


In [63]:
results

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
epochs,1000.0,1000.0,1000.0,1000.0,10000.0,10000.0,10000.0,10000.0,100000.0,100000.0,100000.0,100000.0,1000000.0,1000000.0,1000000.0,1000000.0
hidden units,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0
train_error,0.025814,0.02937,0.015715,0.018677,0.020232,0.029754,0.009606,0.016511,0.020245,0.030341,0.006701,0.018379,0.02025,0.026067,0.00649,0.015274
test_error,0.042875,0.04252,0.038239,0.029322,0.038879,0.049897,0.051454,0.044841,0.03891,0.057722,0.065705,0.063738,0.038916,0.086603,0.070086,0.06389


In [85]:
results4

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
epochs,1000.0,1000.0,1000.0,1000.0,10000.0,10000.0,10000.0,10000.0,100000.0,100000.0,100000.0,100000.0,1000000.0,1000000.0,1000000.0,1000000.0
hidden units,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0
train_error,0.020846,0.020371,0.012607,0.020393,0.018232,0.012038,0.008561,0.009684,0.017248,0.009807,0.008554,0.005275,0.017128,0.009162,0.008962,0.008278
test_error,0.035976,0.033296,0.019646,0.031125,0.040651,0.030903,0.039211,0.027347,0.04023,0.036524,0.046355,0.025387,0.040174,0.073451,0.0488,0.037489


In [101]:
results5

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
epochs,1000.0,1000.0,1000.0,1000.0,10000.0,10000.0,10000.0,10000.0,100000.0,100000.0,100000.0,100000.0,1000000.0,1000000.0,1000000.0,1000000.0
hidden units,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0,2.0,3.0,4.0,5.0
train_error,0.02079,0.020523,0.021659,0.020244,0.020394,0.020078,0.014162,0.015391,0.021239,0.020149,0.020396,0.017172,0.021239,0.019867,0.022054,0.016973
test_error,0.036388,0.037338,0.038756,0.047729,0.037215,0.036336,0.093349,0.067977,0.03746,0.037888,0.159024,0.07839,0.03746,0.037601,0.157637,0.081951


In [315]:
class MLP:
    def __init__(self, num_inputs, num_outputs,
                 hidden_units=3, epochs=100, learning_rate=.1,
                 random_state=None, loss='log',
                 first_activation='sig', second_activation='sig'):
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.hidden_units = hidden_units
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.loss_type = loss
        self.a_func_1 = first_activation
        self.a_func_2 = second_activation
        
    def checkParameters(self):
        activations = ['sig', 'tanh', 'softmax', 'relu']
        self.num_inputs = self.X.shape[1]
        self.num_outputs = self.y.shape[1]
        if self.X.shape[0] != self.y.shape[0]:
            raise ValueError('X and Y have mismatched shapes')
        if self.num_inputs < 1:
            raise ValueError('Must have at least 1 input')
        if self.num_outputs < 1:
            raise ValueError('Must have at least 1 output')
        if self.hidden_units < 1:
            raise ValueError('Must have at least 1 hidden unit')
        if self.epochs < 1:
            raise ValueError('Must train for at least 1 epoch')
        #Is it a max of 1 also??
        if self.learning_rate <=0:
            raise ValueError('Learning rate must be greater than 0')
        if self.loss_type not in ['log', 'mse']:
            raise ValueError('Unknown loss function {}'.format(self.loss_type))
        if self.a_func_1 not in activations:
            raise ValueError('Unknown activation function {}'.format(self.a_func_1))
        if self.a_func_2 not in activations:
            raise ValueError('Unknown activation function {}'.format(self.a_func_2))

        
    def setActivation(self, active):
        if active == 'sig':
            def active(x, deriv=False):
                sig = (1 / (1 + np.exp(-x)))
                if deriv is True:
                    return sig * (1-sig)
                return sig
        
        elif active == 'tanh':
            def active(x, deriv=False):
                if deriv is True:
                    return 1 - (x**2)
                return np.tanh(x)
        
        elif active == 'softmax':
            # from https://mlfromscratch.com/neural-network-tutorial/#/
            def active(x, deriv=False):
                # for numerical stability, values are normalised
                exps = np.exp(x - np.max(x, axis=1, keepdims=True))
                if deriv is True:
                    return exps / np.sum(exps) * (1 - exps / np.sum(exps))
                return exps / np.sum(exps, axis=1, keepdims=True)
        
        else: # RELU
            def active(x, deriv=False):
                if deriv is True:
                    return np.where(x >= 0, 1, 0)
                return np.maximum(x, 0)
        
        return active

    def setLossFunction(self, loss):
        if loss == 'log':
            def loss(deriv=False):
                epsilon = 1e-5
                if deriv is True:
                    return -(self.y/self.output) + ((1-self.y)/(1-self.output))
                return (1/self.num_inputs) * np.sum(-((self.y * np.log(self.output+epsilon)) + (1 - self.y)  * np.log(1 - self.output + epsilon)))
        else:
            def loss(deriv=False):
                if deriv == True:
                    return -2 * (self.y - self.output)
                return (np.square(self.y - self.output).mean())
        return loss
        
    def forward(self):
        self.z1 = self.X@self.w1 + self.b1
        self.a1 = self.a_func_1(self.z1)
        self.z2 = self.a1@self.w2 + self.b2
        self.output = self.a_func_2(self.z2)
    
    
    def backward(self):
        self.loss = self.LossFunction()
        #added the times F'() see results four. Without it resutlts1
      #  d_output = (self.output - self.y) * (1-(self.output**2))
        d_output = self.LossFunction(deriv=True) * self.a_func_2(self.output, deriv=True)
        d_w2 = (1/self.num_inputs) * (self.a1.T @ d_output)
        d_b2 = (1/self.num_inputs) * (np.sum(d_output, axis = 0, keepdims=True))
        d_z1 = (d_output@self.w2.T) * self.a_func_1(self.a1, deriv=True)
        d_w1 = (1/self.num_inputs) * (self.X.T@d_z1)
        d_b1 = (1/self.num_inputs) * (np.sum(d_z1, axis=0, keepdims=True))

        self.w1 -= (self.learning_rate * d_w1)
        self.w2 -= (self.learning_rate * d_w2)
        self.b1 -= (self.learning_rate * d_b1)
        self.b2 -= (self.learning_rate * d_b2)

    def fit(self, x, y):
        self.X = x
        self.y = y
        self.checkParameters()
        self.LossFunction = self.setLossFunction(self.loss_type)
        self.a_func_1 = self.setActivation(self.a_func_1)
        self.a_func_2 = self.setActivation(self.a_func_2)
        if self.random_state is not None:
            np.random.seed(self.random_state)

#         Xavier initialization to train model to converge faster
        self.w1 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.num_inputs+self.hidden_units)),
                                size = (self.num_inputs, self.hidden_units))
        self.w2 = np.random.normal(loc=0.0,
                                scale=np.sqrt(2/(self.hidden_units+self.num_outputs)),
                                size = (self.hidden_units, self.num_outputs))
        self.b1 = np.zeros((1,self.hidden_units))
        self.b2 = np.zeros((1,self.num_outputs))
        for i in range (self.epochs):
            self.forward()
            self.backward()
            print("Epoch {} Loss is {}".format(i+1, self.loss))


    def predict(self, x):
        self.X = x
        self.forward()
        return self.output

In [316]:
X_data = pd.read_csv('letter-recognition.data', sep=",")

In [317]:
y_data = np.array(X_data.pop(X_data.columns[0]))
X_data = np.array(X_data)

In [318]:
y_data = y_data.astype(dtype='<U1')

In [319]:
unique, counts = np.unique(y_data, return_counts=True)
values = dict(zip(unique, counts))

In [320]:
values

{'A': 789,
 'B': 766,
 'C': 736,
 'D': 805,
 'E': 768,
 'F': 775,
 'G': 773,
 'H': 734,
 'I': 755,
 'J': 747,
 'K': 739,
 'L': 761,
 'M': 792,
 'N': 783,
 'O': 753,
 'P': 803,
 'Q': 783,
 'R': 758,
 'S': 748,
 'T': 795,
 'U': 813,
 'V': 764,
 'W': 752,
 'X': 787,
 'Y': 786,
 'Z': 734}

In [321]:
print(X_data.shape, y_data.shape)

(19999, 16) (19999,)


In [322]:
y_data = np.array(y_data.view(np.uint32)) - 65


In [323]:
unique, counts = np.unique(y_data, return_counts=True)
values = dict(zip(unique, counts))

In [324]:
#Uncomment to return numbers to letters
#np.char.mod('%c', y_data+65)

In [325]:
values

{0: 789,
 1: 766,
 2: 736,
 3: 805,
 4: 768,
 5: 775,
 6: 773,
 7: 734,
 8: 755,
 9: 747,
 10: 739,
 11: 761,
 12: 792,
 13: 783,
 14: 753,
 15: 803,
 16: 783,
 17: 758,
 18: 748,
 19: 795,
 20: 813,
 21: 764,
 22: 752,
 23: 787,
 24: 786,
 25: 734}

In [326]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=0, test_size = 0.25, train_size = 0.75)

In [327]:
y_train_wide = to_categorical(y_train)
y_test_wide = to_categorical(y_test)

In [328]:
mlp = MLP(X_train.shape[1], y_train_wide.shape[1],
          hidden_units=26,
          epochs=100000,
          learning_rate=.2, loss='log', 
           first_activation = 'sig', second_activation='softmax')

In [None]:
mlp.fit(X_train, y_train_wide)

Epoch 1 Loss is 4202.957703703221
Epoch 2 Loss is 4189.523785720581
Epoch 3 Loss is 4177.244475875908
Epoch 4 Loss is 4165.962084674516
Epoch 5 Loss is 4155.546512711668
Epoch 6 Loss is 4145.890372920188
Epoch 7 Loss is 4136.904861018071
Epoch 8 Loss is 4128.51633748303
Epoch 9 Loss is 4120.663560561732
Epoch 10 Loss is 4113.295383704429
Epoch 11 Loss is 4106.368805293907
Epoch 12 Loss is 4099.84740671534
Epoch 13 Loss is 4093.7000682072776
Epoch 14 Loss is 4087.899888139447
Epoch 15 Loss is 4082.423364078522
Epoch 16 Loss is 4077.2496690084677
Epoch 17 Loss is 4072.3600821914647
Epoch 18 Loss is 4067.7375540416238
Epoch 19 Loss is 4063.3663564034273
Epoch 20 Loss is 4059.2318453626362
Epoch 21 Loss is 4055.320257469828
Epoch 22 Loss is 4051.6185808030095
Epoch 23 Loss is 4048.1144498243984
Epoch 24 Loss is 4044.79608902051
Epoch 25 Loss is 4041.65227155283
Epoch 26 Loss is 4038.672287922152
Epoch 27 Loss is 4035.8459405311755
Epoch 28 Loss is 4033.1635333964864
Epoch 29 Loss is 4030.6

Epoch 231 Loss is 3909.049201380781
Epoch 232 Loss is 3908.7411522731754
Epoch 233 Loss is 3908.434411061101
Epoch 234 Loss is 3908.128975081377
Epoch 235 Loss is 3907.824841938806
Epoch 236 Loss is 3907.5220091430974
Epoch 237 Loss is 3907.2204741044034
Epoch 238 Loss is 3906.9202343421866
Epoch 239 Loss is 3906.621287293382
Epoch 240 Loss is 3906.3236303286412
Epoch 241 Loss is 3906.027260644524
Epoch 242 Loss is 3905.7321755720286
Epoch 243 Loss is 3905.438372310626
Epoch 244 Loss is 3905.145847886729
Epoch 245 Loss is 3904.854599330042
Epoch 246 Loss is 3904.564623735474
Epoch 247 Loss is 3904.2759179681516
Epoch 248 Loss is 3903.9884790703672
Epoch 249 Loss is 3903.702304037433
Epoch 250 Loss is 3903.41738985133
Epoch 251 Loss is 3903.1337334329146
Epoch 252 Loss is 3902.8513317707334
Epoch 253 Loss is 3902.570181843662
Epoch 254 Loss is 3902.2902806785173
Epoch 255 Loss is 3902.0116250991123
Epoch 256 Loss is 3901.734211877681
Epoch 257 Loss is 3901.4580378223905
Epoch 258 Loss i

Epoch 458 Loss is 3865.1632250460525
Epoch 459 Loss is 3865.0464593725974
Epoch 460 Loss is 3864.930031821365
Epoch 461 Loss is 3864.8139389446874
Epoch 462 Loss is 3864.6981772609215
Epoch 463 Loss is 3864.5827432690976
Epoch 464 Loss is 3864.4676335363392
Epoch 465 Loss is 3864.3528445794786
Epoch 466 Loss is 3864.238372908283
Epoch 467 Loss is 3864.1242151381384
Epoch 468 Loss is 3864.0103678867877
Epoch 469 Loss is 3863.8968277513877
Epoch 470 Loss is 3863.7835913492313
Epoch 471 Loss is 3863.670655406087
Epoch 472 Loss is 3863.5580166135387
Epoch 473 Loss is 3863.4456716638483
Epoch 474 Loss is 3863.3336172091454
Epoch 475 Loss is 3863.221850053131
Epoch 476 Loss is 3863.110367079412
Epoch 477 Loss is 3862.9991650792144
Epoch 478 Loss is 3862.8882408711097
Epoch 479 Loss is 3862.777591301038
Epoch 480 Loss is 3862.6672133407783
Epoch 481 Loss is 3862.5571039536485
Epoch 482 Loss is 3862.447260008769
Epoch 483 Loss is 3862.3376785029545
Epoch 484 Loss is 3862.228356488151
Epoch 485

Epoch 686 Loss is 3842.5227865148054
Epoch 687 Loss is 3842.4268621863184
Epoch 688 Loss is 3842.330877393973
Epoch 689 Loss is 3842.234830992088
Epoch 690 Loss is 3842.1387218511527
Epoch 691 Loss is 3842.042548792017
Epoch 692 Loss is 3841.946310614245
Epoch 693 Loss is 3841.850006194119
Epoch 694 Loss is 3841.7536344244077
Epoch 695 Loss is 3841.657194176415
Epoch 696 Loss is 3841.560684284896
Epoch 697 Loss is 3841.4641036151734
Epoch 698 Loss is 3841.3674509658003
Epoch 699 Loss is 3841.2707251202933
Epoch 700 Loss is 3841.1739248753784
Epoch 701 Loss is 3841.077049008044
Epoch 702 Loss is 3840.980096358779
Epoch 703 Loss is 3840.8830656978776
Epoch 704 Loss is 3840.785955787035
Epoch 705 Loss is 3840.6887654091165
Epoch 706 Loss is 3840.5914932887285
Epoch 707 Loss is 3840.494138142757
Epoch 708 Loss is 3840.3966987119784
Epoch 709 Loss is 3840.2991736756812
Epoch 710 Loss is 3840.2015617339052
Epoch 711 Loss is 3840.1038615837547
Epoch 712 Loss is 3840.006071878688
Epoch 713 Los

Epoch 915 Loss is 3817.804778554136
Epoch 916 Loss is 3817.691990525376
Epoch 917 Loss is 3817.5792778350824
Epoch 918 Loss is 3817.4666408286375
Epoch 919 Loss is 3817.3540799947086
Epoch 920 Loss is 3817.2415957147086
Epoch 921 Loss is 3817.1291882898886
Epoch 922 Loss is 3817.0168579238375
Epoch 923 Loss is 3816.9046049561416
Epoch 924 Loss is 3816.792429477756
Epoch 925 Loss is 3816.680331835825
Epoch 926 Loss is 3816.568312150105
Epoch 927 Loss is 3816.4563709310182
Epoch 928 Loss is 3816.344508309235
Epoch 929 Loss is 3816.232724387316
Epoch 930 Loss is 3816.1210192172966
Epoch 931 Loss is 3816.009392847485
Epoch 932 Loss is 3815.89784526893
Epoch 933 Loss is 3815.786376382632
Epoch 934 Loss is 3815.6749861034577
Epoch 935 Loss is 3815.563674348578
Epoch 936 Loss is 3815.452441180973
Epoch 937 Loss is 3815.3412865425194
Epoch 938 Loss is 3815.230210231475
Epoch 939 Loss is 3815.119212058159
Epoch 940 Loss is 3815.0082918576513
Epoch 941 Loss is 3814.897449464163
Epoch 942 Loss is

Epoch 1137 Loss is 3793.8083154828746
Epoch 1138 Loss is 3793.6990621316045
Epoch 1139 Loss is 3793.589734740442
Epoch 1140 Loss is 3793.4803321709774
Epoch 1141 Loss is 3793.3708532762266
Epoch 1142 Loss is 3793.26129692646
Epoch 1143 Loss is 3793.1516619788426
Epoch 1144 Loss is 3793.041947389914
Epoch 1145 Loss is 3792.9321520384847
Epoch 1146 Loss is 3792.8222747764717
Epoch 1147 Loss is 3792.712314477794
Epoch 1148 Loss is 3792.6022700202107
Epoch 1149 Loss is 3792.4921402258383
Epoch 1150 Loss is 3792.3819239613695
Epoch 1151 Loss is 3792.271620050291
Epoch 1152 Loss is 3792.1612273571436
Epoch 1153 Loss is 3792.050744718606
Epoch 1154 Loss is 3791.9401709624876
Epoch 1155 Loss is 3791.8295049352964
Epoch 1156 Loss is 3791.718745465825
Epoch 1157 Loss is 3791.6078913755855
Epoch 1158 Loss is 3791.4969415431015
Epoch 1159 Loss is 3791.385894791309
Epoch 1160 Loss is 3791.274749983805
Epoch 1161 Loss is 3791.1635059981345
Epoch 1162 Loss is 3791.052161627254
Epoch 1163 Loss is 3790

Epoch 1358 Loss is 3766.442238619711
Epoch 1359 Loss is 3766.303221639547
Epoch 1360 Loss is 3766.1641190906134
Epoch 1361 Loss is 3766.0249318897136
Epoch 1362 Loss is 3765.885660960628
Epoch 1363 Loss is 3765.7463071980233
Epoch 1364 Loss is 3765.6068714912863
Epoch 1365 Loss is 3765.467354727216
Epoch 1366 Loss is 3765.327757798549
Epoch 1367 Loss is 3765.1880815536924
Epoch 1368 Loss is 3765.0483268205894
Epoch 1369 Loss is 3764.9084944136857
Epoch 1370 Loss is 3764.7685851465058
Epoch 1371 Loss is 3764.628599835929
Epoch 1372 Loss is 3764.4885392617775
Epoch 1373 Loss is 3764.3484042119962
Epoch 1374 Loss is 3764.2081954728487
Epoch 1375 Loss is 3764.0679138237792
Epoch 1376 Loss is 3763.927560026963
Epoch 1377 Loss is 3763.787134831051
Epoch 1378 Loss is 3763.646638975482
Epoch 1379 Loss is 3763.5060731509247
Epoch 1380 Loss is 3763.3654380329954
Epoch 1381 Loss is 3763.224734309322
Epoch 1382 Loss is 3763.0839626313928
Epoch 1383 Loss is 3762.943123645696
Epoch 1384 Loss is 3762

Epoch 1584 Loss is 3733.27067092874
Epoch 1585 Loss is 3733.1142244147904
Epoch 1586 Loss is 3732.9576888209267
Epoch 1587 Loss is 3732.8010651712407
Epoch 1588 Loss is 3732.644354498127
Epoch 1589 Loss is 3732.4875579246827
Epoch 1590 Loss is 3732.3306766843143
Epoch 1591 Loss is 3732.173712027615
Epoch 1592 Loss is 3732.0166652021862
Epoch 1593 Loss is 3731.859537471516
Epoch 1594 Loss is 3731.702330183677
Epoch 1595 Loss is 3731.545044675571
Epoch 1596 Loss is 3731.387682353368
Epoch 1597 Loss is 3731.230244674373
Epoch 1598 Loss is 3731.072733107016
Epoch 1599 Loss is 3730.9151491589737
Epoch 1600 Loss is 3730.7574944144694
Epoch 1601 Loss is 3730.599770522922
Epoch 1602 Loss is 3730.441979100596
Epoch 1603 Loss is 3730.2841217803507
Epoch 1604 Loss is 3730.126200236749
Epoch 1605 Loss is 3729.9682162046774
Epoch 1606 Loss is 3729.8101714333034
Epoch 1607 Loss is 3729.6520677447756
Epoch 1608 Loss is 3729.4939070495648
Epoch 1609 Loss is 3729.3356912450645
Epoch 1610 Loss is 3729.1

Epoch 1806 Loss is 3700.4764906112405
Epoch 1807 Loss is 3700.352986951121
Epoch 1808 Loss is 3700.2296991343096
Epoch 1809 Loss is 3700.106625173615
Epoch 1810 Loss is 3699.9837630631255
Epoch 1811 Loss is 3699.861110758562
Epoch 1812 Loss is 3699.7386661933506
Epoch 1813 Loss is 3699.616427324593
Epoch 1814 Loss is 3699.49439214464
Epoch 1815 Loss is 3699.372558552895
Epoch 1816 Loss is 3699.2509244817775
Epoch 1817 Loss is 3699.1294879540615
Epoch 1818 Loss is 3699.0082469742138
Epoch 1819 Loss is 3698.8871994727674
Epoch 1820 Loss is 3698.766343434393
Epoch 1821 Loss is 3698.645676857857
Epoch 1822 Loss is 3698.5251977393496
Epoch 1823 Loss is 3698.4049040503937
Epoch 1824 Loss is 3698.284793824678
Epoch 1825 Loss is 3698.164865063888
Epoch 1826 Loss is 3698.045115789612
Epoch 1827 Loss is 3697.9255440439006
Epoch 1828 Loss is 3697.806147880449
Epoch 1829 Loss is 3697.6869253256746
Epoch 1830 Loss is 3697.5678744429233
Epoch 1831 Loss is 3697.448993343156
Epoch 1832 Loss is 3697.33

Epoch 2031 Loss is 3676.201785916
Epoch 2032 Loss is 3676.1070479930804
Epoch 2033 Loss is 3676.0123509157543
Epoch 2034 Loss is 3675.917691352106
Epoch 2035 Loss is 3675.8230659611427
Epoch 2036 Loss is 3675.7284713467725
Epoch 2037 Loss is 3675.633903971586
Epoch 2038 Loss is 3675.539360304514
Epoch 2039 Loss is 3675.4448367622294
Epoch 2040 Loss is 3675.3503296699932
Epoch 2041 Loss is 3675.2558353194763
Epoch 2042 Loss is 3675.161350073016
Epoch 2043 Loss is 3675.0668702441926
Epoch 2044 Loss is 3674.972392041632
Epoch 2045 Loss is 3674.8779116948326
Epoch 2046 Loss is 3674.7834252878583
Epoch 2047 Loss is 3674.6889288172515
Epoch 2048 Loss is 3674.59441829767
Epoch 2049 Loss is 3674.4998896035377
Epoch 2050 Loss is 3674.4053385509137
Epoch 2051 Loss is 3674.3107609526196
Epoch 2052 Loss is 3674.216152498892
Epoch 2053 Loss is 3674.12150895488
Epoch 2054 Loss is 3674.02682607552
Epoch 2055 Loss is 3673.9320995225917
Epoch 2056 Loss is 3673.8373247831155
Epoch 2057 Loss is 3673.7424

Epoch 2250 Loss is 3651.2350173244554
Epoch 2251 Loss is 3651.112038778041
Epoch 2252 Loss is 3650.9893472900458
Epoch 2253 Loss is 3650.866946831402
Epoch 2254 Loss is 3650.7448411806413
Epoch 2255 Loss is 3650.6230340732136
Epoch 2256 Loss is 3650.5015291096383
Epoch 2257 Loss is 3650.3803298206562
Epoch 2258 Loss is 3650.2594396946315
Epoch 2259 Loss is 3650.138862131012
Epoch 2260 Loss is 3650.0186005113455
Epoch 2261 Loss is 3649.898658150849
Epoch 2262 Loss is 3649.7790383722618
Epoch 2263 Loss is 3649.6597444446047
Epoch 2264 Loss is 3649.54077957547
Epoch 2265 Loss is 3649.4221469092145
Epoch 2266 Loss is 3649.303849597497
Epoch 2267 Loss is 3649.185890648123
Epoch 2268 Loss is 3649.068273069182
Epoch 2269 Loss is 3648.9509998203434
Epoch 2270 Loss is 3648.834073727143
Epoch 2271 Loss is 3648.71749759862
Epoch 2272 Loss is 3648.6012743042
Epoch 2273 Loss is 3648.4854066466314
Epoch 2274 Loss is 3648.369897388841
Epoch 2275 Loss is 3648.254749307696
Epoch 2276 Loss is 3648.13996

Epoch 2470 Loss is 3635.974976398161
Epoch 2471 Loss is 3635.981415157365
Epoch 2472 Loss is 3635.9886998745887
Epoch 2473 Loss is 3635.996831325765
Epoch 2474 Loss is 3636.0058102812177
Epoch 2475 Loss is 3636.015637436494
Epoch 2476 Loss is 3636.026313532703
Epoch 2477 Loss is 3636.037839289278
Epoch 2478 Loss is 3636.0502153922725
Epoch 2479 Loss is 3636.0634424627715
Epoch 2480 Loss is 3636.077521223174
Epoch 2481 Loss is 3636.0924523919507
Epoch 2482 Loss is 3636.1082365490574
Epoch 2483 Loss is 3636.1248742289977
Epoch 2484 Loss is 3636.142366062244
Epoch 2485 Loss is 3636.160712719584
Epoch 2486 Loss is 3636.179914947314
Epoch 2487 Loss is 3636.1999732619142
Epoch 2488 Loss is 3636.22088827631
Epoch 2489 Loss is 3636.242660559854
Epoch 2490 Loss is 3636.2652905928376
Epoch 2491 Loss is 3636.2887788421667
Epoch 2492 Loss is 3636.313125827986
Epoch 2493 Loss is 3636.338332042127
Epoch 2494 Loss is 3636.3643978082127
Epoch 2495 Loss is 3636.3913235542373
Epoch 2496 Loss is 3636.419

Epoch 2692 Loss is 3655.5344428890467
Epoch 2693 Loss is 3655.652830587843
Epoch 2694 Loss is 3655.770630524951
Epoch 2695 Loss is 3655.8878305997923
Epoch 2696 Loss is 3656.0044187622048
Epoch 2697 Loss is 3656.1203830792188
Epoch 2698 Loss is 3656.2357117185006
Epoch 2699 Loss is 3656.3503929291182
Epoch 2700 Loss is 3656.464415101865
Epoch 2701 Loss is 3656.5777667400416
Epoch 2702 Loss is 3656.69043641853
Epoch 2703 Loss is 3656.8024127633184
Epoch 2704 Loss is 3656.9136845518033
Epoch 2705 Loss is 3657.0242407441165
Epoch 2706 Loss is 3657.1340704032227
Epoch 2707 Loss is 3657.243162581465
Epoch 2708 Loss is 3657.3515064472986
Epoch 2709 Loss is 3657.4590912413146
Epoch 2710 Loss is 3657.565906338183
Epoch 2711 Loss is 3657.6719411786785
Epoch 2712 Loss is 3657.7771853647578
Epoch 2713 Loss is 3657.881628533123
Epoch 2714 Loss is 3657.985260482201
Epoch 2715 Loss is 3658.088071086839
Epoch 2716 Loss is 3658.1900503138872
Epoch 2717 Loss is 3658.291188290422
Epoch 2718 Loss is 3658

Epoch 2915 Loss is 3658.8389522103644
Epoch 2916 Loss is 3658.7601466005253
Epoch 2917 Loss is 3658.6808654670785
Epoch 2918 Loss is 3658.601117399228
Epoch 2919 Loss is 3658.520910765007
Epoch 2920 Loss is 3658.4402538529016
Epoch 2921 Loss is 3658.359154776027
Epoch 2922 Loss is 3658.277621743871
Epoch 2923 Loss is 3658.1956629161523
Epoch 2924 Loss is 3658.1132866161606
Epoch 2925 Loss is 3658.0305012201047
Epoch 2926 Loss is 3657.9473153223207
Epoch 2927 Loss is 3657.8637376439992
Epoch 2928 Loss is 3657.7797770814263
Epoch 2929 Loss is 3657.695442554311
Epoch 2930 Loss is 3657.6107431216187
Epoch 2931 Loss is 3657.5256880476777
Epoch 2932 Loss is 3657.4402866649016
Epoch 2933 Loss is 3657.3545483959256
Epoch 2934 Loss is 3657.2684828320266
Epoch 2935 Loss is 3657.182099745217
Epoch 2936 Loss is 3657.0954089928555
Epoch 2937 Loss is 3657.0084205053863
Epoch 2938 Loss is 3656.921144355958
Epoch 2939 Loss is 3656.833590765565
Epoch 2940 Loss is 3656.7457700239634
Epoch 2941 Loss is 3

Epoch 3141 Loss is 3640.9543861682682
Epoch 3142 Loss is 3640.9011306269135
Epoch 3143 Loss is 3640.848280668408
Epoch 3144 Loss is 3640.7958378796457
Epoch 3145 Loss is 3640.743803902252
Epoch 3146 Loss is 3640.692180200378
Epoch 3147 Loss is 3640.6409681467967
Epoch 3148 Loss is 3640.5901690287883
Epoch 3149 Loss is 3640.5397839547463
Epoch 3150 Loss is 3640.489813944391
Epoch 3151 Loss is 3640.440259805645
Epoch 3152 Loss is 3640.39112224499
Epoch 3153 Loss is 3640.342401954347
Epoch 3154 Loss is 3640.294099525308
Epoch 3155 Loss is 3640.246215351072
Epoch 3156 Loss is 3640.1987497210703
Epoch 3157 Loss is 3640.151702837275
Epoch 3158 Loss is 3640.105074766863
Epoch 3159 Loss is 3640.0588655019
Epoch 3160 Loss is 3640.013074921401
Epoch 3161 Loss is 3639.9677027861444
Epoch 3162 Loss is 3639.922748798388
Epoch 3163 Loss is 3639.8782123731607
Epoch 3164 Loss is 3639.8340928809553
Epoch 3165 Loss is 3639.7903895371514
Epoch 3166 Loss is 3639.7471014604257
Epoch 3167 Loss is 3639.70422

In [None]:
train_pred_wide = mlp.predict(X_train)
test_pred_wide = mlp.predict(X_test)

In [None]:
# TO DO - ADD HISTORY (loss and accuracy) AND A WAY TO SAVE THE WEIGHTS.

SIGMOID FUNCtion preDICTING EVERYTHIGN THE SAME CLASS

In [252]:
train_pred.shape

(14999,)

In [None]:
train_pred = np.argmax(train_pred_wide, axis=1)
test_pred = np.argmax(test_pred_wide, axis=1)

In [None]:
print(metrics.classification_report(y_train, train_pred))