In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import keras
from keras.models import Model, Sequential, load_model
from keras.layers import *
from keras.optimizers import Adam
from keras.callbacks import *

import tensorflow as tf
import numpy as np

import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# Binary adder model

### Inputs

* Interger between **0 to 100**
* Convert to an **array of bits** to represent the number in **binary**
* Referred to as **input1** and **input2**

### Result

* Interger between **0 to 200**
* Convert to an **array of bits** to represent the number in **binary**
* Referred to as **result**

In [2]:
print(keras.__version__)
print(tf.__version__)

2.0.8
1.3.0


In [3]:
def plot_train(hist):
    h = hist.history
    if 'acc' in h:
        meas='acc'
        loc='lower right'
    else:
        meas='loss'
        loc='upper right'
    plt.plot(hist.history[meas])
    plt.plot(hist.history['val_'+meas])
    plt.title('model '+meas)
    plt.ylabel(meas)
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc=loc)

In [4]:
def get_operands_and_sums(operand):
    operand = operand.astype("uint8")
    sums = []
    for operand1 in operand:
        for operand2 in operand:
            sums.append(np.unpackbits(operand1 + operand2))

    operands = []
    for operand1 in operand:
        for operand2 in operand:
            operands.append([np.unpackbits(operand1), np.unpackbits(operand2)])            
            
    operands = np.array(operands, dtype=np.uint8)
    
    operands_transpose = []
    
    for operand in operands:
        operands_transpose.append(operand.T[::-1])
        
    operands_transpose = np.array(operands_transpose, dtype=np.uint8)
    sums = np.array(sums, dtype=np.uint8)
    sums = np.flip(sums, 1)
#     operands = np.unpackbits(operands, axis=1)
#     sums = np.unpackbits(sums)
    return (operands_transpose, sums)

In [5]:
# Generate the full set of integers between 0 and 100 as operand
operand = np.arange(0, 101);
operands, sums = get_operands_and_sums(operand);

print(len(operands))
print(len(sums))
print(operands[10200])
print(sums[10200])

number_of_records = len(operands)
shuffle = np.arange(number_of_records)
np.random.shuffle(shuffle)
test_fraction = 0.8

# print(shuffle)

#making a train / test split
train_split, test_split = shuffle[:int(number_of_records*test_fraction)], shuffle[int(number_of_records*test_fraction):]
# print(train_split)
# print(test_split)
x_train , y_train = operands[train_split,:] , sums[train_split]
x_test , y_test = operands[test_split,:] , sums[test_split]

10201
10201
[[0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]]
[0 0 0 1 0 0 1 1]


In [6]:
# Generate a subset of integers between 0 and 100 as operand

# fifties = np.empty(100)
# fifties.fill(50)
# x_train , y_train = get_operands_and_sums(fifties);

# multiples_of_two = np.arange(0, 101, 2)
# x_train , y_train = get_operands_and_sums(multiples_of_two);

# zero_to_hundred = np.arange(0, 101);
# x_test , y_test = get_operands_and_sums(zero_to_hundred);

In [7]:
# x_train = x_train.reshape(len(x_train), 8, 2)
# x_test = x_test.reshape(len(x_test), 8, 2)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_train[1])
print(y_train[1])
print(x_test[1])
print(y_test[1])

(8160, 8, 2)
(8160, 8)
(2041, 8, 2)
(2041, 8)
[[1 1]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]]
[0 0 1 1 0 0 0 0]
[[0 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 0]
 [0 0]]
[1 1 0 0 1 1 1 0]


In [8]:
inp = Input(shape=(x_train.shape[1], x_train.shape[2]))
print('our input shape is ',(x_train.shape[1], x_train.shape[2]) )
x = LSTM(16)(inp)
x = Dropout(0.2)(x)
#x = LSTM(256)(x)
#x = Dropout(0.2)(x)
output = Dense(y_train.shape[1], activation ='sigmoid')(x)

# 1 x LSTM(256) - val_acc at 1.0000 after 26 epochs
# 1 x LSTM(32) - val_acc at 0.9994 after 32 epochs
# 1 x LSTM(24) - val_acc at 1.0000 after 73 epochs
# 1 x LSTM(16) - val_acc at 0.9991 after 26 epochs
# 1 x LSTM(16) - val_acc at 0.9992 after 66 epochs
# 1 x LSTM(16) - val_acc at 0.9998 after 71 epochs
# 1 x LSTM(16) - val_acc at 0.9998 after 126 epochs
# 1 x LSTM(8) - val_acc at 0.8902 after 115 epochs
# 1 x LSTM(8) - val_acc at 0.9207 after 327 epochs

our input shape is  (8, 2)


In [9]:
adam = Adam(lr=0.01)
generative_model = Model(inputs = inp, outputs=output )
generative_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
generative_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 8, 2)              0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 16)                1216      
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 136       
Total params: 1,352
Trainable params: 1,352
Non-trainable params: 0
_________________________________________________________________


In [10]:
filepath="checkpoints/Recurrent_binary_adder_dense-weights-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', save_best_only=True, mode='min') # , verbose=1
reduce_LR = ReduceLROnPlateau(monitor='loss',factor = 0.9, patience=3,cooldown=2, min_lr = 0.00001)
early_stopping = EarlyStopping(monitor='val_acc', patience=10) #, min_delta=0.0001)
callbacks_list = [checkpoint, reduce_LR, early_stopping]

generative_model.fit(
    x_train, y_train,
    epochs=500,
    batch_size=64,
    validation_data=(x_test, y_test),
    callbacks=callbacks_list
)

Train on 8160 samples, validate on 2041 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500


<keras.callbacks.History at 0x12938ac50>

In [11]:
preds = generative_model.predict(x_test[0:5]);

print(preds)

for pred in preds:
    pred[pred>=0.5] = 1
    pred[pred<0.5] = 0
    
preds = preds.astype("uint8");

print(np.packbits(preds, -1))
print(np.packbits(y_test[0:5], -1))

[[  7.23107485e-04   2.81765952e-05   9.95644212e-01   3.92717411e-05
    9.99968052e-01   9.99753535e-01   9.99830365e-01   3.09400548e-06]
 [  9.99831796e-01   9.99978065e-01   3.34606170e-06   2.41872817e-06
    9.99943852e-01   9.99893665e-01   9.99286592e-01   2.02143679e-06]
 [  9.98846054e-01   6.32932293e-04   6.68778142e-04   9.97909129e-01
    3.21618631e-04   9.98268127e-01   3.57295765e-04   9.99992967e-01]
 [  3.48598114e-04   4.84240329e-04   3.11524118e-03   9.91398335e-01
    9.99918103e-01   5.21747395e-04   9.99932408e-01   4.46210606e-06]
 [  6.68367837e-04   9.99589264e-01   1.13592658e-03   7.03941532e-06
    9.99876142e-01   1.42907968e-03   6.50802278e-04   9.99993563e-01]]
[[ 46]
 [206]
 [149]
 [ 26]
 [ 73]]
[[ 46]
 [206]
 [149]
 [ 26]
 [ 73]]


In [12]:
print(x_train.shape)
print(y_train.shape)

(8160, 8, 2)
(8160, 8)


In [13]:
inp = Input(shape=(x_train.shape[1], x_train.shape[2]))
print('our input shape is ',(x_train.shape[1], x_train.shape[2]) )
x = LSTM(6, return_sequences=True)(inp)
x = Dropout(0.2)(x)
#x = LSTM(256)(x)
#x = Dropout(0.2)(x)
# print(x.shape)
output = TimeDistributed(Dense(1, activation ='sigmoid'))(x)

our input shape is  (8, 2)


In [14]:
# adam = Adam(lr=0.01)
timedistributed_model = Model(inputs = inp, outputs=output )
timedistributed_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
timedistributed_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 8, 2)              0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 8, 6)              216       
_________________________________________________________________
dropout_2 (Dropout)          (None, 8, 6)              0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 8, 1)              7         
Total params: 223
Trainable params: 223
Non-trainable params: 0
_________________________________________________________________


In [15]:
y_train_3D = y_train.reshape(y_train.shape[0], y_train.shape[1],  1)
print(y_train.shape)
print(y_train_3D.shape)
y_test_3D = y_test.reshape(y_test.shape[0], y_test.shape[1], 1)
print(y_test.shape)
print(y_test_3D.shape)

(8160, 8)
(8160, 8, 1)
(2041, 8)
(2041, 8, 1)


In [16]:
# 1 x LSTM(2) - val_acc at 0.7381 after 27 epochs
# 1 x LSTM(4) - val_acc at 0.9483 after 52 epochs
# 1 x LSTM(4) - val_acc at 0.9484 after 17 epochs
# 1 x LSTM(5) - val_acc at 0.9958 after 1000 epochs
# 1 x LSTM(5) - val_acc at 0.9972 after 2000 epochs
# 1 x LSTM(5) - val_acc at 0.9987 after 3000 epochs
# 1 x LSTM(5) - val_acc at 0.9990 after 3291 - 4000 epochs
# 1 x LSTM(6) - val_acc at 0.9490 after 35 epochs
# 1 x LSTM(6) - val_acc at 1.0000 after 14 epochs
# 1 x LSTM(6) - val_acc at 1.0000 after 15 epochs
# 1 x LSTM(8) - val_acc at 1.0000 after 39 epochs
# 1 x LSTM(256) - val_acc at 1.0000 after 24 epochs

filepath="checkpoints/Recurrent_binary_adder_timedistributed-weights-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', save_best_only=True, mode='min')
reduce_LR = ReduceLROnPlateau(monitor='loss',factor = 0.9, patience=3,cooldown=2, min_lr = 0.00001)
early_stopping = EarlyStopping(monitor='val_acc', patience=10) #, min_delta=0.0001)
callbacks_list = [checkpoint, reduce_LR, early_stopping]

timedistributed_model.fit(
    x_train, y_train_3D,
    epochs=1000,
    batch_size=64,
    validation_data=(x_test, y_test_3D),
    callbacks=callbacks_list
)

Train on 8160 samples, validate on 2041 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000


<keras.callbacks.History at 0x12989bef0>

In [17]:
# timedistributed_model.save("timedistributed_model-lstm6-1_0000.model")
timedistributed_model = load_model("timedistributed_model-lstm6-1_0000.model")
preds = timedistributed_model.predict(x_test[0:5]);

# print(preds)
# print(y_test_3D[0:5])
preds = preds.reshape(preds.shape[0], 8)

for pred in preds:
    pred[pred>=0.5] = 1
    pred[pred<0.5] = 0
    
preds = preds.astype("uint8");

# print(preds)
print(np.packbits(preds, -1))
print(np.packbits(y_test_3D[0:5].reshape(5, 8), -1))

[[ 46]
 [206]
 [149]
 [ 26]
 [ 73]]
[[ 46]
 [206]
 [149]
 [ 26]
 [ 73]]
