# Training a Neural Network on predicting Exclusive Or

In [1]:
import numpy as np
from keras.models import Sequential # base Keras model class
from keras.layers import Dense, Activation #Dense is a fully connected layer of Neurons
from keras.optimizers import SGD # Stochastic gradient descent



In [37]:
#The examples for an exclusive Or
x_train=np.array([[0, 0],[0, 1],[1, 0],[1, 1]]) #training data
y_train = np.array([[0],[1],[1],[0]]) #training labels, exclusive or 

model= Sequential()
num_neurons=10 #fully connected hidden layer will have 10 neurons
model.add(Dense(num_neurons, input_dim=2)) #input dimensions, only needed for the first layers, the later layers will calculate the shape from the previous layers. The feature vectors for our example is 2D
model.add(Activation('tanh'))
model.add(Dense(1)) # the output layer has one neuron to output a single binary classification value
model.add(Activation('sigmoid'))
model.summary() #gives overview of network parameters and number of weights

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 10)                30        
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 11        
_________________________________________________________________
activation_7 (Activation)    (None, 1)                 0         
Total params: 41
Trainable params: 41
Non-trainable params: 0
_________________________________________________________________


In [None]:
# the first dense '30' refers to the number of weights the network needs to learn; 2 input weights *10 neurons + 1 bias weight*10 neurons for a total of 30 weights

# the second dense '11' refers to the 10 weights the output neuron needs to consider and the additional bias weight, for a total of 11

In [38]:
sgd=SGD(learning_rate=0.1) #stochastic gradient is the optimizer that was imported, the learning rate is the fraction that determines how much of the oberved error is corrected during an epoch. too small, and model training time is increased and leaves model vulnerable to settling at local minima. Too large and the model might miss the global minima
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) #the loss function is defined as binary cross entropy, the compile method builds the model but doesn't train it yet. The weights are initialized



In [5]:

model.predict(x_train) #running before model is trained gives the raw output of the last layer which would normally be produced by the sigmoid function 

array([[0.5       ],
       [0.33742684],
       [0.5948041 ],
       [0.43347698]], dtype=float32)

In [39]:
model.fit(x_train,y_train,epochs=140) # here we are training the model

Epoch 1/140
Epoch 2/140
Epoch 3/140
Epoch 4/140
Epoch 5/140
Epoch 6/140
Epoch 7/140
Epoch 8/140
Epoch 9/140
Epoch 10/140
Epoch 11/140
Epoch 12/140
Epoch 13/140
Epoch 14/140
Epoch 15/140
Epoch 16/140
Epoch 17/140
Epoch 18/140
Epoch 19/140
Epoch 20/140
Epoch 21/140
Epoch 22/140
Epoch 23/140
Epoch 24/140
Epoch 25/140
Epoch 26/140
Epoch 27/140
Epoch 28/140
Epoch 29/140
Epoch 30/140
Epoch 31/140
Epoch 32/140
Epoch 33/140
Epoch 34/140
Epoch 35/140
Epoch 36/140
Epoch 37/140
Epoch 38/140
Epoch 39/140
Epoch 40/140
Epoch 41/140
Epoch 42/140
Epoch 43/140
Epoch 44/140
Epoch 45/140
Epoch 46/140
Epoch 47/140
Epoch 48/140
Epoch 49/140
Epoch 50/140
Epoch 51/140
Epoch 52/140
Epoch 53/140
Epoch 54/140
Epoch 55/140
Epoch 56/140
Epoch 57/140
Epoch 58/140
Epoch 59/140
Epoch 60/140
Epoch 61/140
Epoch 62/140
Epoch 63/140
Epoch 64/140
Epoch 65/140
Epoch 66/140
Epoch 67/140
Epoch 68/140
Epoch 69/140
Epoch 70/140
Epoch 71/140
Epoch 72/140
Epoch 73/140
Epoch 74/140
Epoch 75/140
Epoch 76/140
Epoch 77/140
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x2291d8fa610>

In [40]:
model.predict_classes(x_train)

array([[0],
       [1],
       [1],
       [0]])

In the textbook, 100 epochs returned the proper results however, for me, it took 140 epochs. 120 didn't work

In [41]:
model.predict(x_train)

array([[0.33858937],
       [0.58934826],
       [0.5891377 ],
       [0.49524826]], dtype=float32)

Here we can see the accuracy with which each label was predicted

### we are going to save this model so we don't have use computational power to retrain it again

In [43]:
import h5py
model_structure=model.to_json() #using a keras helper method, save the network structure
with open("basic_model.json", "w") as json_file:
    json_file.write(model_structure)
model.save_weights("basic_weights.h5") #the weights have to be save separetely and must be reinstantiated with the model structure when model will be used later on