In [1]:
""" Repeating MNIST first experiment of swish paper. """

import numpy as np
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

# For adding new activation function
from keras import backend as K
from keras.datasets import mnist
from keras.utils.generic_utils import get_custom_objects
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", Y_train.shape)

X_train original shape (60000, 28, 28)
y_train original shape (60000,)


In [3]:
# Normalization
nb_classes = 10
# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

In [4]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [5]:
# Set the random seed
random_seed = 2

In [6]:
# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=random_seed)

In [7]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.reshape(-1,784)
X_val = X_val.reshape(-1,784)
X_test = X_test.reshape(-1,784)
# test = test.values.reshape(-1,28,28,1)
print(X_train.shape, X_val.shape, X_test.shape)

(54000, 784) (6000, 784) (10000, 784)


In [18]:
def swish(x):
    return x*K.sigmoid(x)

def e_swish_2(x):
    sigmoid = K.sigmoid(x)
    return K.maximum(x*sigmoid, x*(2-sigmoid))

In [9]:
# Set the CNN model 
# my CNN architechture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out
def create(act, n):
    model = Sequential()
    # First conv block
    model.add(Dense(512, input_shape=(784,)))
    model.add(Activation(act))
    for i in range(n-1):
        model.add(Dense(512))
        model.add(Activation(act))
        model.add(Dropout(0.3))
        
    model.add(Dense(nb_classes))
    model.add(Activation("sigmoid"))
    
    return model

In [16]:
record = []
for n in [10, 15, 20]:
    opt = SGD()
    # Set a learning rate annealer
#         learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.7, min_lr=0.00001) 
    # Common params 
    epochs = 10
    batch_size = 100
    # Create and compile the model
    model = model = create("relu", n)
#     model.summary()
    # Compile the model
    model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
    # Train the model
    history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                        verbose = 1)# , callbacks=[learning_rate_reduction])

    record.append([n, model.evaluate(X_test, Y_test)])
    print(n, ":", model.evaluate(X_test, Y_test))
    K.get_session().close()
    K.set_session(K.tf.Session())

print()
print()
print()
for r in record:
    print(r)




[10, [0.13656429858431221, 0.96499999999999997]]
[15, [1.1838343352317811, 0.43409999999999999]]
[20, [2.544732809829712, 0.12379999999999999]]


In [17]:
record = []
for n in [10, 15, 20]:
    opt = SGD()
    # Set a learning rate annealer
#         learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.7, min_lr=0.00001) 
    # Common params 
    epochs = 10
    batch_size = 100
    # Create and compile the model
    model = model = create(e_swish_2, n)
#     model.summary()
    # Compile the model
    model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
    # Train the model
    history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                        verbose = 1)# , callbacks=[learning_rate_reduction])

    record.append([n, model.evaluate(X_test, Y_test)])
    print(n, ":", model.evaluate(X_test, Y_test))
    K.get_session().close()
    K.set_session(K.tf.Session())

    
print()
print()
print()
for r in record:
    print(r)

Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10 : [0.088420898429770023, 0.97629999999999995]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
15 : [1.714672702407837, 0.2069]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
20 : [2.3014014907836913, 0.126]



[10, [0.088420898429770023, 0.97629999999999995]]
[15, [1.714672702407837, 0.2069]]
[20, [2.3014014907836913, 0.126]]


In [19]:
record = []
for n in [10, 15, 20]:
    opt = SGD()
    # Set a learning rate annealer
#         learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.7, min_lr=0.00001) 
    # Common params 
    epochs = 10
    batch_size = 100
    # Create and compile the model
    model = model = create(swish, n)
#     model.summary()
    # Compile the model
    model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
    # Train the model
    history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                        verbose = 1)# , callbacks=[learning_rate_reduction])

    record.append([n, model.evaluate(X_test, Y_test)])
    print(n, ":", model.evaluate(X_test, Y_test))
    K.get_session().close()
    K.set_session(K.tf.Session())

    
print()
print()
print()
for r in record:
    print(r)

Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10 : [2.2998147857666016, 0.1135]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

In [10]:
# results = []
# for act in ["relu", e_swish_2, swish]:
#     record = []
#     for n in [15, 20, 25, 30]:
#         opt = SGD()
#         # Set a learning rate annealer
# #         learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.7, min_lr=0.00001) 
#         # Common params 
#         epochs = 10
#         batch_size = 100
#         # Create and compile the model
#         model = model = create("relu", 5)
#     #     model.summary()
#         # Compile the model
#         model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
#         # Train the model
#         history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
#                             verbose = 0)# , callbacks=[learning_rate_reduction])

#         record.append([n, model.evaluate(X_test, Y_test)])
#         print(n, ":", model.evaluate(X_test, Y_test))
        
#     results.append(record)
    
#     print()
#     print()
#     print()

Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
6 : [0.08236231568926014, 0.97540000000000004]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
8 : [0.085712986452970652, 0.97370000000000001]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10 : [0.086075643507856875, 0.9728]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
12 : [0.08723578938604333, 0.97340000000000004]



Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
6 : [0.08631477886

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
8 : [0.08903080306346528, 0.97330000000000005]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10 : [0.082056643377337604, 0.97430000000000005]
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
12 : [0.091209545813687148, 0.97130000000000005]





In [None]:
probs_1 = model.predict_proba(X_test)
scores = model.evaluate(X_test, Y_test)
print(scores)