In [180]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization, Activation
from keras.optimizers import Adam, SGD
import data_generator
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [184]:
# Useful functions
def save_model(model, path):
    model.save(path)

def load_model(path):
    return load_model(path)

def generate_data(number_of_training_pairs=10000, number_of_test_pairs=512): # training and test data will be *2 those
    training_set = data_generator.training_generator(number_of_training_pairs)
    test_set = data_generator.test_generator(number_of_test_pairs)
    X, y, y_sum = next(training_set)
    X_test, y_test, y_sum_test = next(test_set)

    X1 = [el[0] for el in X]
    X2 = [el[1] for el in X]
    X = np.array(X1 + X2)

    y1 = [el[0] for el in y]
    y2 = [el[1] for el in y]
    y = np.array(y1 + y2)

    X1 = [el[0] for el in X_test]
    X2 = [el[1] for el in X_test]
    X_test = np.array(X1 + X2)

    y1 = [el[0] for el in y_test]
    y2 = [el[1] for el in y_test]
    y_test = np.array(y1 + y2)
    return X, y, X_test, y_test

def reshape_and_normalize(X, y, X_test, y_test):
    # constants
    number_of_classes = 256
    data_height = 28
    data_width = 84

    # normalization
    X = X / 255.0
    X_test = X_test / 255.0

    y = keras.utils.to_categorical(y, number_of_classes)
    y_test = keras.utils.to_categorical(y_test, number_of_classes)

    # prepare data
    X = X.reshape(X.shape[0], data_height, data_width, 1)
    X_test = X_test.reshape(X_test.shape[0], data_height, data_width, 1)
    return X, y, X_test, y_test

# creates a model where we vary optimizers (2 possibilities: sgd or adam) and their learning rates
# More about the model architecture below
def create_model(optimizer, lr): # optimizer is a string can be "adam" or "sgd"
    # needed constants
    number_of_classes = 256
    data_height = 28
    data_width = 84
    input_shape = X.shape[1:]
    
    # create optimizer (the only thing that varies and we are trying to tune)
    # it can be either SGD or Adam and it can have variable learning rate
    if optimizer == "sgd":
        my_optimizer = SGD(lr=lr)
    elif optimizer == "adam":
        my_optimizer = Adam(lr=lr)
    else:
        my_optimizer = "adam" # make the optimizer default adam if an unknown is being specified

    # model
    model = Sequential()

    # Layer 1
    model.add(Conv2D(16, (5, 5), padding="SAME", input_shape=input_shape, kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 2
    model.add(Conv2D(32, (3, 3), padding="SAME", kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 3
    model.add(Conv2D(64, (5, 5), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 4
    model.add(Conv2D(128, (3, 3), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 5
    model.add(Conv2D(256, (3, 3), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 6
    model.add(Conv2D(128, (2, 2), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))

    # Layer 7
    model.add(Conv2D(64, (1, 1), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 8
    model.add(Conv2D(32, (1, 1), kernel_initializer="he_normal"))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(256))
    model.add(Activation('softmax'))

    model.compile(loss="categorical_crossentropy", optimizer=my_optimizer, metrics=['categorical_accuracy'])
    return model

def train_model(model, X, y):
    # number of epochs. Explained below. We will use all the time 10 epochs.
    epochs = 10
    batch_size = 32
    validation_split = 0.1 # percentage of data to be used for validation
    history = LossAccHistory()
    model.fit(X, y, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[history])
    return model, history
    
# class used to save history of the model (every batch loss value and accuracy value) - plot purposes
class LossAccHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.accuracies = []
    
    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.accuracies.append(logs.get('categorical_accuracy'))
        
def plot_losses_per_batch(history):
    plt.plot(range(len(history.losses)), history.losses)

def plot_accuracies_per_batch(history):
    plt.plot(range(len(history.accuracies)), history.accuracies)
    
def evaluate_all_models(models, X_test, y_test):
    batch_size=32
    for index, model_info in enumerate(models):
        model = model_info["model"]
        accuracy_and_loss = model.evaluate(X_test, y_test, batch_size=batch_size)
        model_info["accuracy"] = accuracy_and_loss[1]
        models[index] = model_info
        print("Params: ", model_info["params"])
        print("[Loss, Accuracy]: ", accuracy_and_loss)
    return models


In [None]:
# generate data
X, y, X_test, y_test = generate_data(number_of_training_pairs=15000, number_of_test_pairs=1000) # 30k images for train. 2000 images for evaluation
# reshape data and normalize it
X, y, X_test, y_test = reshape_and_normalize(X, y, X_test, y_test)
print("Train X shape:", X.shape)
print("Train y shape:", y.shape)
print("Test X shape:", X_test.shape)
print("Test y shape:", y_test.shape)

In [183]:
# create models with their respective params
models = []
parameters = [{"optimizer": "sgd", "lr": 0.001}, 
              {"optimizer": "sgd", "lr": 0.01},
              {"optimizer": "sgd", "lr": 0.1},
              {"optimizer": "adam", "lr": 0.001},
              {"optimizer": "adam", "lr": 0.01},
              {"optimizer": "adam", "lr": 0.1}]
for params in parameters:
    models.append({"model": create_model(**params),
                   "params": params,
                   "history": None,
                   "Accuracy": None
                  })
print("Number of models to test: ", len(models))

Number of models to test:  6


In [162]:
# train all the models and save their history
for index, model_info in enumerate(models):
    model, history = train_model(model_info["model"], X, y)
    model_info["history"] = history
    model_info["model"] = model
    models[index] = model_info
    # also save models after training them
    save_model(model=model, path="model_number_{}.h5".format(index))

[15, 15, 15, 15]


In [None]:
# Evaluate all models
models = evaluate_all_models(models, X_test, y_test)

In [None]:
# TBD - PLOTS HERE

# NOTHING BELOW. ALL THAT HAPPENS IS ABOVE

In [88]:
training_set = data_generator.training_generator(10000)
test_set = data_generator.test_generator(512)
X, y, numbers_sum = next(training_set)
X_test, y_test, numbers_sum_test = next(test_set)

X1 = [el[0] for el in X]
X2 = [el[1] for el in X]
X = np.array(X1 + X2)

y1 = [el[0] for el in y]
y2 = [el[1] for el in y]
y = np.array(y1 + y2)

X1 = [el[0] for el in X_test]
X2 = [el[1] for el in X_test]
X_test = np.array(X1 + X2)

y1 = [el[0] for el in y_test]
y2 = [el[1] for el in y_test]
y_test = np.array(y1 + y2)

In [89]:
print(X.shape)
print(y.shape)

(20000, 28, 84)
(20000,)


In [90]:
# constants
number_of_classes = 256
data_height = 28
data_width = 84

# normalization
X = X / 255.0
X_test = X_test / 255.0

y = keras.utils.to_categorical(y, number_of_classes)
y_test = keras.utils.to_categorical(y_test, number_of_classes)

# prepare data
X = X.reshape(X.shape[0], data_height, data_width, 1)
X_test = X_test.reshape(X_test.shape[0], data_height, data_width, 1)

In [91]:
print(X.shape)
print(y.shape)

(20000, 28, 84, 1)
(20000, 256)


In [76]:
# model
number_of_classes = 256
data_height = 28
data_width = 84
input_shape = X.shape[1:]

model = Sequential()

# Layer 1
model.add(Conv2D(16, (5, 5), padding="SAME", input_shape=input_shape, kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 2
model.add(Conv2D(32, (3, 3), padding="SAME", kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
model.add(Conv2D(64, (5, 5), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 4
model.add(Conv2D(128, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 5
model.add(Conv2D(256, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 6
model.add(Conv2D(128, (2, 2), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

# Layer 7
model.add(Conv2D(64, (1, 1), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 8
model.add(Conv2D(32, (1, 1), kernel_initializer="he_normal"))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('sigmoid'))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['categorical_accuracy'])

In [77]:
# training
epochs = 10
model.fit(X, y, batch_size=32, epochs=epochs, validation_data=(X_test, y_test))

Train on 4096 samples, validate on 512 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x149163780>

In [78]:
print(model.evaluate(X_test, y_test, batch_size=32))

[0.009642288030590862, 0.650390625]


In [94]:
# Working decently good on 10k tuples (20k datas)
# model
number_of_classes = 256
data_height = 28
data_width = 84
input_shape = X.shape[1:]

model = Sequential()

# Layer 1
model.add(Conv2D(16, (5, 5), padding="SAME", input_shape=input_shape, kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 2
model.add(Conv2D(32, (3, 3), padding="SAME", kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
model.add(Conv2D(64, (5, 5), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 4
model.add(Conv2D(128, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 5
model.add(Conv2D(256, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 6
model.add(Conv2D(128, (2, 2), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

# Layer 7
model.add(Conv2D(64, (1, 1), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 8
model.add(Conv2D(32, (1, 1), kernel_initializer="he_normal"))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('sigmoid'))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['categorical_accuracy'])

In [95]:
# training
epochs = 20
model.fit(X, y, batch_size=32, epochs=epochs, validation_data=(X_test, y_test))

Train on 20000 samples, validate on 1024 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 

In [96]:
print(model.evaluate(X_test, y_test, batch_size=32))

[0.001086571221549093, 0.9638671875]


In [99]:
# model
number_of_classes = 256
data_height = 28
data_width = 84
input_shape = X.shape[1:]

model = Sequential()

# Layer 1
model.add(Conv2D(16, (5, 5), padding="SAME", input_shape=input_shape, kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 2
model.add(Conv2D(32, (3, 3), padding="SAME", kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
model.add(Conv2D(64, (5, 5), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 4
model.add(Conv2D(128, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 5
model.add(Conv2D(256, (3, 3), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 6
model.add(Conv2D(128, (2, 2), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

# Layer 7
model.add(Conv2D(64, (1, 1), kernel_initializer="he_normal"))
model.add(BatchNormalization(axis=-1))
model.add(Activation("relu"))

# Layer 8
model.add(Conv2D(32, (1, 1), kernel_initializer="he_normal"))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('softmax'))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['categorical_accuracy'])

In [100]:
# training
epochs = 10
model.fit(X, y, batch_size=32, epochs=epochs, validation_split=0.1)

Train on 18000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x159705470>

In [101]:
print(model.evaluate(X_test, y_test, batch_size=32))

[0.19805248358170502, 0.9501953125]


In [103]:
model.metrics_names

['loss', 'categorical_accuracy']

In [106]:
# from tensorflow.keras.models import save_model
filepath = "test.h5"
# save_model(
#     model,
#     filepath,
#     overwrite=True,
#     include_optimizer=False
# )
from tensorflow.keras.models import load_model

# Creates a HDF5 file 'my_model.h5'
model.save('test.h5')

# Deletes the existing model
# del model  

# Returns a compiled model identical to the previous one
loaded_model = load_model('test.h5')
print(loaded_model.evaluate(X_test, y_test, batch_size=32))

Instructions for updating:
`normal` is a deprecated alias for `truncated_normal`
[0.19805248358170502, 0.9501953125]


In [107]:
def save_model(model, path):
    model.save(path)

def load_model(path):
    return load_model(path)

In [123]:
def create_model(optimizer, lr): # optimizer is a string can be adam or sgd
    # needed constants
    number_of_classes = 256
    data_height = 28
    data_width = 84
    input_shape = X.shape[1:]
    
    # create optimizer (the only thing that varies and we are trying to tune)
    # it can be either SGD or Adam and it can have variable learning rate
    if optimizer == "sgd":
        my_optimizer = SGD(lr=lr)
    elif optimizer == "adam":
        my_optimizer = Adam(lr=lr)
    else:
        my_optimizer = "adam" # make the optimizer default adam if an unknown is being specified

    # model
    model = Sequential()

    # Layer 1
    model.add(Conv2D(16, (5, 5), padding="SAME", input_shape=input_shape, kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 2
    model.add(Conv2D(32, (3, 3), padding="SAME", kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 3
    model.add(Conv2D(64, (5, 5), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 4
    model.add(Conv2D(128, (3, 3), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 5
    model.add(Conv2D(256, (3, 3), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 6
    model.add(Conv2D(128, (2, 2), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))

    # Layer 7
    model.add(Conv2D(64, (1, 1), kernel_initializer="he_normal"))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation("relu"))

    # Layer 8
    model.add(Conv2D(32, (1, 1), kernel_initializer="he_normal"))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(256))
    model.add(Activation('softmax'))

    model.compile(loss="categorical_crossentropy", optimizer=my_optimizer, metrics=['categorical_accuracy'])
    return model

In [124]:
test_model = create_model(optimizer="sgd", lr=0.1)

In [133]:
def generate_data(number_of_training_pairs=10000, number_of_test_pairs=512): # training and test data will be *2 those
    training_set = data_generator.training_generator(10000)
    test_set = data_generator.test_generator(512)
    X, y, y_sum = next(training_set)
    X_test, y_test, y_sum_test = next(test_set)

    X1 = [el[0] for el in X]
    X2 = [el[1] for el in X]
    X = np.array(X1 + X2)

    y1 = [el[0] for el in y]
    y2 = [el[1] for el in y]
    y = np.array(y1 + y2)

    X1 = [el[0] for el in X_test]
    X2 = [el[1] for el in X_test]
    X_test = np.array(X1 + X2)

    y1 = [el[0] for el in y_test]
    y2 = [el[1] for el in y_test]
    y_test = np.array(y1 + y2)
    return X, y, X_test, y_test

In [147]:
class LossAccHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.accuracies = []
    
    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.accuracies.append(logs.get('categorical_accuracy'))
        
#     def on_epoch_end(self, epoch, logs={}):
#         self.losses.append(logs.get('loss'))
#         self.accuracies.append(logs.get('categorical_accuracy'))
        
# history = LossAccHistory()
# test_model.fit(X[0:500], y[0:500], batch_size=32, epochs=5, validation_split=0.1, callbacks=[history])

Train on 450 samples, validate on 50 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x162f8d3c8>

In [148]:
print(history.losses)
print(history.accuracies)

[7.9564724, 1.1944323, 0.87889886, 1.0200045, 0.45208138, 0.21060523, 0.7228309, 0.24094003, 1.8075376, 0.2363414, 0.76121944, 0.14856291, 0.3987022, 0.100380555, 0.24653392, 1.7890053, 0.77559346, 0.054110695, 0.09549186, 0.043260794, 1.3083303, 0.08706829, 0.079399064, 0.08174059, 0.08740516, 0.056921907, 0.4620665, 0.5501208, 0.06587903, 0.104383536, 0.054414228, 0.024807712, 0.02603944, 0.023731273, 0.52836597, 0.028588887, 0.5379766, 0.030301126, 0.046189114, 0.023221772, 0.1576784, 0.52122045, 0.053343896, 0.05967178, 0.02585376, 0.012239475, 0.01849955, 0.01249096, 0.010327104, 0.52097785, 0.016067531, 0.51198107, 0.014715387, 0.016183976, 0.5197759, 0.015276787, 0.015541628, 0.011852007, 0.013347808, 0.055449046, 0.045773264, 0.012184987, 0.014783305, 0.080280714, 0.012427762, 0.012179995, 0.0112769455, 0.013792239, 0.5184592, 0.012693517, 0.5153785, 0.011070278, 0.01156084, 0.51682985, 0.06604616]
[0.1875, 0.6875, 0.71875, 0.84375, 0.9375, 1.0, 0.9375, 0.96875, 0.84375, 0.9062

In [150]:
def plot_losses_per_batch(history):
    plt.plot(range(len(history.losses)), history.losses)

def plot_accuracies_per_batch(history):
    plt.plot(range(len(history.accuracies)), history.accuracies)

In [165]:
model1, history = train_model(test_model, X, y)

Train on 450 samples, validate on 50 samples
Epoch 1/2
Epoch 2/2


In [166]:
print(model1.evaluate(X_test, y_test))
print(test_model.evaluate(X_test, y_test))

[9.282231956720352, 0.021484375]
[9.282231956720352, 0.021484375]


In [174]:
mdl = [{"model": model1, "params": "meh"}, {"model": test_model, "params": "meh"}]
# print(mdl)
mdl = evaluate_all_models(mdl, X_test[0:100], y_test[0:100])
# print(mdl)

Params:  meh
[Loss, Accuracy]:  [9.232184143066407, 0.0]
Params:  meh
[Loss, Accuracy]:  [9.232184143066407, 0.0]
