<a href="https://colab.research.google.com/github/balszeg/deep_learning_kishf5/blob/main/kishf5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing and building the network

In [1]:
# for this homework hyperas will be used
!pip3 install hyperas
!pip3 install hyperopt



Firstly, import the necessary utilites from Keras

In [2]:
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Activation
from keras.callbacks import EarlyStopping
import numpy as np

Downloading the dataset

In [3]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Setting up the parameters

In [4]:
batch_size = 32
num_classes = 10
epochs = 50

Encoding to one-hot the labels to 1-10

In [5]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Setting the datatype to float32 for later purposes

In [6]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

Normalizing the data between 0 and 1

In [7]:
x_train /= 255
x_test /= 255

Building and compiling the network

In [8]:
model = Sequential()

#the model below based on basic principles and trial-error experiences

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Setting callback function for early stop, if the learning hit the plateau

In [9]:
callbacks = [EarlyStopping(monitor='val_accuracy', patience=5, verbose=0)]

Training the model

In [10]:
result = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              callbacks=callbacks,
              shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50


Let see the best validation accuracy at this point

In [12]:
best_val_acc = np.amax(result.history['val_accuracy']) 
print('The best val_acc:', best_val_acc)

The best val_acc: 0.5504999756813049


# Optimizin with hyperas

For the hyperas it is necessary to change the network in the right format

In [13]:
# firstly, separately downloading the data and building the model
# for these a function defined

def data():
  (x_train, y_train), (x_test, y_test) = cifar10.load_data()
  
  num_classes = 10
  
  # encoding to one-hot the labels
  y_train = keras.utils.to_categorical(y_train, num_classes)
  y_test = keras.utils.to_categorical(y_test, num_classes)
  
  # setting the type to float32
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')

  # normalizing between 0-1
  x_train /= 255
  x_test /= 255
  
  return x_train, y_train, x_test, y_test

In [16]:
# defining a function for model creating too

def create_model(x_train, y_train, x_test, y_test):
    
    # swish optimizer will be used, and it has to be coded too
    # for that, the code from this site used 
    # https://stackoverflow.com/questions/53050448/custom-activation-with-parameter
    from keras.layers import Layer
    from keras import backend as K

    class Swish(Layer):
        def __init__(self, beta, **kwargs):
            super(Swish, self).__init__(**kwargs)
            self.beta = K.cast_to_floatx(beta)

        def call(self, inputs):
            return K.sigmoid(self.beta * inputs) * inputs

        def get_config(self):
            config = {'beta': float(self.beta)}
            base_config = super(Swish, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

        def compute_output_shape(self, input_shape):
            return input_shape
    
    # the different choices given for the hyperas
    # the number of neurons on the dense layers
    n_layer1 = {{choice([256, 512, 1024])}}
    n_layer2 = {{choice([256, 512, 1024])}}
    n_layer3 = {{choice([256, 512, 1024])}}

    # the dropout and its measure
    dropout_1 = {{uniform(0, 0.5)}}
    dropout_2 = {{uniform(0, 0.5)}}
    dropout_3 = {{uniform(0, 0.5)}}

    # the activation functions
    act = {{choice(['relu', 'leakyrelu', 'swish'])}}

    # the optimizers
    optim = {{choice(['rmsprop', 'adam', 'sgd'])}}

    # the batch sizes
    n_batch = {{choice([64, 128, 256])}}
    print("the model's hyperparameters:", n_layer1, n_layer2, dropout_1, dropout_2, act, optim, n_batch)
    
    # handling the activation choices
    if act == 'relu':
        activation = keras.layers.ReLU()
    elif act == 'leakyrelu':
        activation = keras.layers.LeakyReLU()
    elif act == 'swish':
        activation = Swish(beta=0.3)
    
    # defining where to use dropout
    model = Sequential()
    model.add(Flatten())
    model.add(Dense(n_layer1))
    model.add(activation)
    model.add(Dropout(dropout_1))
    model.add(Dense(n_layer2))
    model.add(activation)
    model.add(Dropout(dropout_2))
    model.add(Dense(n_layer3))
    model.add(activation)
    model.add(Dropout(dropout_3))
    model.add(Dense(10, activation='softmax'))
    
    model.compile(optimizer=optim,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # setting early stopp
    callbacks = [EarlyStopping(monitor='val_accuracy', patience=3, verbose=0)]
    
    # training the model
    result = model.fit(x_train, y_train,
              batch_size=n_batch,
              epochs=50,
              verbose=2,
              validation_data=(x_test, y_test),
              callbacks=callbacks,
              shuffle=True)

    
    # printing the best accuracy from the epochs
    best_val_acc = np.amax(result.history['val_accuracy']) 
    print('the best val_acc:', best_val_acc)
    
    # creating a log file where the training can be later seen
    with open('hyperas-cifar10-log.csv', 'a') as csv_file:
      csv_file.write(str(n_layer1) + ';')
      csv_file.write(str(n_layer2) + ';')
      csv_file.write(str(n_layer3) + ';')
      csv_file.write(str(dropout_1) + ';')
      csv_file.write(str(dropout_2) + ';')
      csv_file.write(str(dropout_3) + ';')
      csv_file.write(str(act) + ';')
      csv_file.write(str(optim) + ';')
      csv_file.write(str(n_batch) + ';')
      csv_file.write(str(best_val_acc) + '\n')

    return {'loss': -best_val_acc, 'status': STATUS_OK, 'model': model}

Initailazing the log file

In [17]:
# setting up its header
with open('hyperas-cifar10-log.csv', 'w') as csv_file:
  csv_file.write('n_layer1' + ';')
  csv_file.write('n_layer2' + ';')
  csv_file.write('n_layer3' + ';')
  csv_file.write('dropout_1' + ';')
  csv_file.write('dropout_2' + ';')
  csv_file.write('dropout_3' + ';')
  csv_file.write('act' + ';')
  csv_file.write('optim' + ';')
  csv_file.write('n_batch' + ';')
  csv_file.write('best_val_acc' + '\n')


**Important step**, the user has to download the current .ipynb from and upload right back to the files. The reason is that, the hyperas cannot use the defined above choices, because cannot reach the source. The colab store it somewhere else, with this download-upload come around we can still use hyperas. 

In [18]:
# importing the utilities for hyperas
import hyperas
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

Starting the optimizing

In [None]:
best_run, best_model = optim.minimize(model=create_model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=100,
                                          notebook_name='kishf5',
                                          trials=Trials())

>>> Imports:
#coding=utf-8

try:
    import keras
except:
    pass

try:
    from keras.datasets import cifar10
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers import Dense, Flatten, Dropout, Activation
except:
    pass

try:
    from keras.callbacks import EarlyStopping
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from keras.layers import Layer
except:
    pass

try:
    from keras import backend as K
except:
    pass

try:
    import hyperas
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import pandas
except:
    pass

try:
    import matplotlib.pyplot as plt
except:
    pass

try:
    import seaborn as sns
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'n_layer1': hp.choice('n_layer1

Printing out the best soulutions

In [None]:
x_train, y_train, x_test, y_test = data()
print("evaulation of the best model:")
print(best_model.evaluate(x_test, y_test))
print("best model hyperparameters:")
print(best_run)

We can see the difference:
without optimizing: 55% VS with optimizing: 75%

# Evaulation

Reading the log file

In [None]:
import pandas
hyperas_log = pandas.read_csv('hyperas-cifar10-log.csv', delimiter=';')

Let see the best 10 result

In [None]:
hyperas_best10 = hyperas_log.sort_values(by=['best_val_acc'], ascending=False).head(n=10)
hyperas_best10

...and the worst 10

In [None]:
hyperas_worst10 = hyperas_log.sort_values(by=['best_val_acc'], ascending=False).tail(n=10)
hyperas_worst10

Some context for the given result:

In [None]:
import matplotlib.pyplot as plt

# all of the data in blue
# the best 10 in red
# the worst 10 in yellow

for hyperparam in ['n_layer1', 'n_layer2', 'n_layer3', 'n_layer4', 'dropout_1', 'dropout_2', 'dropout_3', 'dropout_4', 'n_batch']:
  ax1 = hyperas_log.plot(kind='scatter', x=hyperparam, y='best_val_acc')
  hyperas_best10.plot(kind='scatter', x=hyperparam, y='best_val_acc', color='red', ax=ax1)
  hyperas_worst10.plot(kind='scatter', x=hyperparam, y='best_val_acc', color='yellow', ax=ax1)

Now the categorical variables

In [None]:
plt.scatter(hyperas_log.act, hyperas_log.best_val_acc)
plt.scatter(hyperas_best10.act, hyperas_best10.best_val_acc, color='red')
plt.scatter(hyperas_worst10.act, hyperas_worst10.best_val_acc, color='yellow')
plt.show()

plt.scatter(hyperas_log.optim, hyperas_log.best_val_acc)
plt.scatter(hyperas_best10.optim, hyperas_best10.best_val_acc, color='red')
plt.scatter(hyperas_worst10.optim, hyperas_worst10.best_val_acc, color='yellow')
plt.show()


Import seaborn for more visualizing

In [None]:
import seaborn as sns

max_val_acc = hyperas_log.groupby(['n_batch', 'optim']).max()
max_val_acc = max_val_acc.unstack()[['best_val_acc']]
sns.heatmap(max_val_acc.best_val_acc, annot=True, fmt='.4g');

# bug: seaborn cuts off borders, https://github.com/mwaskom/seaborn/issues/1773
b, t = plt.ylim() # discover the values for bottom and top
b += 0.5 # Add 0.5 to the bottom
t -= 0.5 # Subtract 0.5 from the top
plt.ylim(b, t) # update the ylim(bottom, top) values

Conclusion: 
Unfortunately, the Colab collapses during the optimizing around 52 evaluation. The reason for this error is, citing: "You run out of all of your available RAM.". In the .csv log file the results can be seen. 