In [1]:
%matplotlib inline
import sys
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt

project_dir = Path.cwd().parent
sys.path.append(str(project_dir))

from mini_nn_framework.layers import Input, FullyConnected, OutputBinary

## Load the processed data

In [2]:
training_data = np.load(open("training_data/mushrooms_training_data.npy", 'rb'))
training_labels = np.load(open("training_data/mushrooms_training_labels.npy", 'rb'))
validation_data = np.load(open("training_data/mushrooms_validation_data.npy", 'rb'))
validation_labels = np.load(open("training_data/mushrooms_validation_labels.npy", 'rb'))
test_data = np.load(open("training_data/mushrooms_test_data.npy", 'rb'))
test_labels = np.load(open("training_data/mushrooms_test_labels.npy", 'rb'))

In [3]:
training_data.shape

(117, 6500)

## Create the neural network

In [4]:
input_layer = Input(input_size=training_data.shape[0], name="input_layer")
fully_connected_1 = FullyConnected(neurons=20, input_layer=input_layer, activation="relu", name="fully_1")
fully_connected_2 = FullyConnected(neurons=20, input_layer=fully_connected_1, activation="relu", name="fully_2")
output_layer = OutputBinary(input_layer=fully_connected_2, name="output_layer")

In [5]:

fully_connected_1.print_parameters()
fully_connected_2.print_parameters()
output_layer.print_parameters()

Layer fully_1 - W
[[ 0.2785468   0.19390332  0.0782021  ... -0.41307517  1.03724337
   1.2274485 ]
 [-1.70297521  0.88407644 -0.48317291 ... -1.11087051 -0.43162459
   1.21451937]
 [-0.41727916 -0.10773089  0.1494799  ...  0.64379164 -1.7500276
   0.21123234]
 ...
 [ 0.17677728 -0.4901911  -1.35023037 ...  1.27133225  0.2196196
  -0.21853419]
 [ 1.23085434 -0.0598549   1.40523416 ...  0.25522565  0.11712501
  -0.77854122]
 [ 0.02377374  0.97684503  1.0971453  ... -0.05848381 -0.32898712
   0.83124727]]
Layer fully_1 - B
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Layer fully_2 - W
[[ 6.03633208e-01  9.06980148e-01 -3.01634464e-01  2.30424920e-01
  -4.81531171e-01 -3.14658988e+00 -3.43943438e-01 -1.26684328e+00
  -1.34437937e+00 -2.22117037e-01 -1.25067364e+00 -4.65259043e-02
   1.52779974e+00 -1.20037597e+00  2.67701412e-01 -2.14346238e-01
  -8.43177818e-01  7.95498653e-01  1.00971249e+00  8.27564318e-01]
 [-3

In [9]:
input_layer.feed_input(training_data_batchs[0])
print(input_layer.output.shape)
fully_connected_1.forward_pass()
print(fully_connected_1.Z.shape)
print(fully_connected_1.output)
fully_connected_2.forward_pass()
print(fully_connected_2.Z.shape)
print(fully_connected_2.output)
output_layer.forward_pass()
print(output_layer.Z.shape)
print(output_layer.output)

(117, 256)
(20, 256)
[[1.30980245 7.16373233 4.48343869 ... 1.50504819 4.57326643 3.52210644]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         5.98974818 0.30131227 ... 2.64379361 2.60779831 0.        ]
 ...
 [2.46363893 0.         0.         ... 0.         3.41984152 0.        ]
 [2.48447021 0.         2.14858926 ... 0.         2.96332608 4.32520406]
 [1.84446925 0.         0.50735995 ... 4.08939231 4.12579104 0.60277164]]
(20, 256)
[[ 4.61203151  0.          0.         ...  0.          0.
   0.        ]
 [ 4.81509032  0.         15.17234029 ...  6.34053785  0.33103554
   5.47249083]
 [ 2.92519215  0.          7.47244606 ...  0.          0.
   0.        ]
 ...
 [ 4.65224061 11.20968117 16.3780846  ...  3.11752356  9.32847166
   3.49865009]
 [ 1.1037705   6.73240928  0.         ...  6.92570825 15.36752288
   0.        ]
 [ 0.          0.          7.60959486 ...  2.53752453 14.68864235
  19.29446519]]
(1, 256)
[[1.00000000e+00 1.00000000e+00 9.999999

In [12]:
fully_connected_2.output[0][4]

0.0

In [10]:
print(output_layer.loss(targets=training_labels_batchs[0]))

[[1.56095137e-11 2.22044605e-16 1.35187239e-09 4.66525019e-04
             inf 2.59372250e+01 2.64677169e-13 2.99760217e-15
  1.74638765e+01 2.74269496e-12 2.39007069e+01 3.77475828e-15
  3.97681887e-13 2.72737388e-12 3.07975867e-13            nan
  5.19427330e+00 6.63144408e+00            nan 1.30522821e+01
             nan            inf 4.17363921e-11            nan
  3.07109346e+01            nan 3.29392784e-02 1.41071360e+01
  1.73189423e+01            inf 2.33813379e+00 3.12900632e+01
  1.64076830e-01 8.13030937e-01 2.22044605e-16            nan
  1.43935200e+01 2.00903344e+01            inf 1.13242749e-14
  2.71510420e+01 1.49547041e-12 1.47663491e+01 2.41571412e+01
  5.15440706e+00 1.48183966e+01 1.67458387e+00            nan
  2.52751502e-04 6.07025541e-12            nan 2.71099434e-02
  1.68258747e+01 7.35230817e+00 3.07757952e+01 2.69806536e+01
  1.54094152e+01            nan 7.57976364e+00 7.09991771e-03
  5.40802028e-07            nan 4.12802777e+00            nan
  8.6597

  return -(labels * np.log(predictions)) - ((1 - labels) * np.log(1 - predictions))
  return -(labels * np.log(predictions)) - ((1 - labels) * np.log(1 - predictions))


In [None]:
input_layer.feed_input(training_data_batchs[j])
print(input_layer.output.shape)
fully_connected_1.forward_pass()
fully_connected_2.forward_pass()
output_layer.forward_pass()

output_layer.loss(targets=training_labels_batchs[j])
costs.append(output_layer.cost())

output_layer.backward_pass(targets=training_labels_batchs[j])
fully_connected_2.backward_pass()
fully_connected_1.backward_pass()

output_layer.parameters_update(learning_rate=learning_rate)
fully_connected_2.parameters_update(learning_rate=learning_rate)
fully_connected_1.parameters_update(learning_rate=learning_rate)

## Hyperparameters

In [7]:
epochs = 100
learning_rate = 0.1
batch_size = 256

## Create batchs

In [8]:
full_batchs = round(training_data.shape[1] / batch_size)

training_data_batchs = []
training_labels_batchs = []
validation_data_batchs = []
validation_labels_batchs = []
test_data_batchs = []
test_labels_batchs = []

for j in range(full_batchs):
    training_data_batchs.append(training_data[:,j * batch_size:(j+1) * batch_size])
    training_labels_batchs.append(training_labels[j * batch_size:(j+1) * batch_size])
    validation_data_batchs.append(validation_data[:,j * batch_size:(j+1) * batch_size])
    validation_labels_batchs.append(validation_labels[j * batch_size:(j+1) * batch_size])
    test_data_batchs.append(test_data[:,j * batch_size:(j+1) * batch_size])
    test_labels_batchs.append(test_labels[j * batch_size:(j+1) * batch_size])

training_data_batchs.append(training_data[:,full_batchs * batch_size:])
training_labels_batchs.append(training_labels[full_batchs * batch_size:])
validation_data_batchs.append(validation_data[:,full_batchs * batch_size:])
validation_labels_batchs.append(validation_labels[full_batchs * batch_size:])
test_data_batchs.append(test_data[:,full_batchs * batch_size:])
test_labels_batchs.append(test_labels[full_batchs * batch_size:])

## Model training

In [None]:
costs = []

for i in range(epochs):
    print(i)
    
    for j in range(len(training_data_batchs)):
        
        input_layer.feed_input(training_data_batchs[j])
        fully_connected_1.forward_pass()
        fully_connected_2.forward_pass()
        output_layer.forward_pass()
        
        output_layer.loss(targets=training_labels_batchs[j])
        costs.append(output_layer.cost())
        
        output_layer.backward_pass(targets=training_labels_batchs[j])
        fully_connected_2.backward_pass()
        fully_connected_1.backward_pass()
        
        output_layer.parameters_update(learning_rate=learning_rate)
        fully_connected_2.parameters_update(learning_rate=learning_rate)
        fully_connected_1.parameters_update(learning_rate=learning_rate)

In [None]:
plt.plot(range(epochs * len(training_data_batchs)), costs)

In [None]:
costs