## Split-net

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass

!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

fuse: mountpoint is not empty
fuse: if you are sure this is safe, use the 'nonempty' mount option


## Loading the data

In [1]:
from time import time
import os
import gzip
import numpy as np
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from keras.datasets import cifar100

Using TensorFlow backend.


In [0]:
(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                  test_size=0.1, 
                                                  random_state=1974,
                                                  stratify = y_train)

In [0]:
train_features = X_train.reshape(X_train.shape[0], 32, 32, 3)/255
vali_features = X_val.reshape(X_val.shape[0], 32, 32, 3)/255
test_features = X_test.reshape(X_test.shape[0], 32, 32, 3)/255

y_train_cat = np_utils.to_categorical(y_train)
y_val_cat = np_utils.to_categorical(y_val)
y_test_cat = np_utils.to_categorical(y_test)

## Defining coarse labels

In [0]:
dict_coarse2 = {0: 4,  1: 1,  2: 14,  3: 8,  4: 12,  5: 6,  6: 7,  7: 7,  8: 18,  9: 3,  10: 3,
                11: 14,  12: 9,  13: 18,  14: 7,  15: 11,  16: 3,  17: 9,  18: 7,  19: 11,  20: 6,
                21: 11,  22: 5,  23: 10,  24: 7,  25: 6,  26: 13,  27: 15,  28: 3,  29: 15,  30: 0,
                31: 11,  32: 1,  33: 10,  34: 12,  35: 14,  36: 16,  37: 9,  38: 11,  39: 5,  40: 5,
                41: 18,  42: 8,  43: 8,  44: 15,  45: 13,  46: 14,  47: 17,  48: 18,  49: 10,  50: 16,
                51: 4,  52: 17,  53: 4,  54: 2,  55: 12,  56: 17,  57: 4,  58: 18,  59: 17,  60: 10,
                61: 3,  62: 2,  63: 12,  64: 12,  65: 16,  66: 12,  67: 1,  68: 9,  69: 18,  70: 2,
                71: 10,  72: 12,  73: 1,  74: 16,  75: 12,  76: 9,  77: 13,  78: 15,  79: 13,  80: 16,
                81: 18,  82: 2,  83: 4,  84: 6,  85: 18,  86: 5,  87: 5,  88: 8,  89: 18,  90: 18,
                91: 1,  92: 2,  93: 15,  94: 6,  95: 0,  96: 17,  97: 8,  98: 14,  99: 13}


dict_coarse1 = {0: 0,
              1: 0,
              2: 1,
              3: 2,
              4: 1,
              5: 2,
              6: 2,
              7: 3,
              8: 4,
              9: 5,
              10: 5,
              11: 4,
              12: 4,
              13: 3,
              14: 6,
              15: 7,
              16: 4,
              17: 1,
              18: 8}

y_train_coarse2 = np.vectorize(dict_coarse2.get)(y_train)
y_val_coarse2 = np.vectorize(dict_coarse2.get)(y_val)
y_test_coarse2 = np.vectorize(dict_coarse2.get)(y_test)

y_train_coarse1 = np.vectorize(dict_coarse1.get)(y_train_coarse2)
y_val_coarse1 = np.vectorize(dict_coarse1.get)(y_val_coarse2)
y_test_coarse1 = np.vectorize(dict_coarse1.get)(y_test_coarse2)


y_train_c_cat1 = np_utils.to_categorical(y_train_coarse1)
y_val_c_cat1 = np_utils.to_categorical(y_val_coarse1)
y_test_c_cat1 = np_utils.to_categorical(y_test_coarse1)

y_train_c_cat2 = np_utils.to_categorical(y_train_coarse2)
y_val_c_cat2 = np_utils.to_categorical(y_val_coarse2)
y_test_c_cat2 = np_utils.to_categorical(y_test_coarse2)

## Training the model

In [0]:
import keras
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Input
from keras import optimizers
from keras.callbacks import TensorBoard, ReduceLROnPlateau, CSVLogger
from keras.layers.normalization import BatchNormalization
from keras import backend as K

In [0]:
class TimingCallback(keras.callbacks.Callback):
  """Callback that saves the time elapsed of each epoch to the log.
  """  
  def on_epoch_begin(self, epoch, logs={}):
    self.starttime=time()
  def on_epoch_end(self, epoch, logs={}):
    logs['time_elapsed'] = (time()-self.starttime)

In [0]:
class LossWeightsModifier3Vars(keras.callbacks.Callback):
  """Callback that modifies the loss weights on certain epochs.
    # Arguments
        alpha: coeficient for the coarser class.
        beta: coeficient for the second level coarse class.
        gamma: coeficient for the fine class.
  """  
  def __init__(self, alpha, beta, gamma):
    self.alpha = alpha
    self.beta = beta
    self.gamma = gamma
    
  def on_epoch_end(self, epoch, logs={}):

    if epoch == 4:
      K.set_value(self.alpha, 0.6)
      K.set_value(self.beta, 0.35)
      K.set_value(self.gamma, 0.05)
      print('Changing loss weights to: coarse1 = {}, coarse2 = {}, fine = {}'.format(K.eval(self.alpha), K.eval(self.beta), K.eval(self.gamma)))
    
    if epoch == 9:
      K.set_value(self.alpha, 0)
      K.set_value(self.beta, 0.8)
      K.set_value(self.gamma, 0.2)
      print('Changing loss weights to: coarse1 = {}, coarse2 = {}, fine = {}'.format(K.eval(self.alpha), K.eval(self.beta), K.eval(self.gamma)))
      
    if epoch == 14:
      K.set_value(self.alpha, 0)
      K.set_value(self.beta, 0.4)
      K.set_value(self.gamma, 0.6)  
      print('Changing loss weights to: coarse1 = {}, coarse2 = {}, fine = {}'.format(K.eval(self.alpha), K.eval(self.beta), K.eval(self.gamma)))
  
    if epoch == 21:
      K.set_value(self.alpha, 0)
      K.set_value(self.beta, 0)
      K.set_value(self.gamma, 1)  
      print('Changing loss weights to: coarse1 = {}, coarse2 = {}, fine = {}'.format(K.eval(self.alpha), K.eval(self.beta), K.eval(self.gamma)))
  
    logs['alpha'] = K.eval(self.alpha)
    logs['beta'] = K.eval(self.beta) 
    logs['gamma'] = K.eval(self.gamma) 

In [0]:
img_rows, img_cols = 32, 32
input_shape = (img_rows, img_cols, 3)

num_classes_coarse1 = 9
num_classes_coarse2 = 19
num_classes_fine = 100

img_input = Input(shape=input_shape, name='input')

x = Conv2D(64, (3, 3), activation='relu', name='block1_conv1', padding='same')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', name='block1_conv2', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), name='block1_pool')(x)

x = Conv2D(128, (3, 3), activation='relu', name='block1_conv3', padding='same')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', name='block1_conv4', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), name='block2_pool')(x)

x = Conv2D(256, (3, 3), activation='relu', name='block1_conv5', padding='same')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', name='block1_conv6', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), name='block3_pool')(x)

x = Conv2D(512, (3, 3), activation='relu', name='block1_conv7', padding='same')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', name='block1_conv8', padding='same')(x)
x = BatchNormalization()(x)

x = Flatten(name='flatten')(x)
x = Dense(1024, name='fc_1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)

x = Dense(1024, name='fc_2')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)


coarse_pred1 = Dense(num_classes_coarse1, activation='softmax', name='predictions_coarse1')(x)
coarse_pred2 = Dense(num_classes_coarse2, activation='softmax', name='predictions_coarse2')(x)
fine_pred = Dense(num_classes_fine, activation='softmax', name='predictions_fine')(x)

In [9]:
model = Model(inputs=img_input, outputs= [coarse_pred1, coarse_pred2, fine_pred], name='split_net')

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 32, 32, 64)   1792        input[0][0]                      
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 64)   256         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 32, 32, 64)   36928       batch_normalization_1[0][0]      
__________________________________________________________________________________________________
batch_norm

In [0]:
alpha = K.variable(value=0.9, dtype="float32", name="alpha") 
beta = K.variable(value=0.1, dtype="float32", name="beta") 
gamma = K.variable(value=0, dtype="float32", name="gamma") 

sgd = optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', 
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              metrics=['accuracy'])

In [0]:
clr_cb = ReduceLROnPlateau(monitor='val_predictions_fine_loss', factor=0.5, patience=10, verbose=1, min_lr=3.125e-05)
time_cb = TimingCallback()
change_lw = LossWeightsModifier3Vars(alpha, beta, gamma)
csv_cb = CSVLogger('drive/TCC-ITAU/cifar-100/training-data/split-net.csv', separator=',', append=False)


cbks = [clr_cb, time_cb, change_lw, csv_cb]

In [18]:
epochs = 50
start_time = time()
training = model.fit(train_features, [y_train_c_cat1, y_train_c_cat2, y_train_cat],
                     validation_data=(vali_features, [y_val_c_cat1, y_val_c_cat2, y_val_cat]),
                     epochs=epochs,
                     batch_size=256, 
                     callbacks=cbks,
                     verbose=1)
training_time = time() - start_time

Train on 45000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Changing loss weights to: coarse1 = 0.6000000238418579, coarse2 = 0.3499999940395355, fine = 0.05000000074505806
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Changing loss weights to: coarse1 = 0.0, coarse2 = 0.800000011920929, fine = 0.20000000298023224
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Changing loss weights to: coarse1 = 0.0, coarse2 = 0.4000000059604645, fine = 0.6000000238418579
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Changing loss weights to: coarse1 = 0.0, coarse2 = 0.0, fine = 1.0
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50

Epoch 00047: ReduceLR

In [19]:
model.evaluate(test_features, [y_test_c_cat1,y_test_c_cat2, y_test_cat])



[2.1723835582733155,
 1.0474919864654542,
 1.4395026556015014,
 2.1723835582733155,
 0.6617,
 0.5671,
 0.4653]

In [20]:
print('Total training time: {}'.format(training_time))

Total training time: 1754.4178442955017


In [21]:
model.save('drive/TCC-ITAU/cifar-100/models/split-net.h5')

TypeError: ignored