# Adaptative Split-baseline-net simple

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass

!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [1]:
from time import time
import os
import gzip
import numpy as np
from keras.utils import np_utils

import matplotlib.pyplot as plt

Using TensorFlow backend.


In [0]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [0]:
X_train, y_train = load_mnist('drive/TCC-ITAU/fashion-mnist/data/fashion', kind='train')
X_test, y_test = load_mnist('drive/TCC-ITAU/fashion-mnist/data/fashion', kind='t10k')

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1974)

In [0]:
train_features = X_train.reshape(X_train.shape[0], 28, 28, 1)/255
vali_features = X_val.reshape(X_val.shape[0], 28, 28, 1)/255
test_features = X_test.reshape(X_test.shape[0], 28, 28, 1)/255

y_train_cat = np_utils.to_categorical(y_train)
y_val_cat = np_utils.to_categorical(y_val)
y_test_cat = np_utils.to_categorical(y_test)

## Coarse and Fine classe

| Label | Fine | Coarse |
| --- | --- | --- |
| 0 | T-shirt/top | TOP |
| 1 | Trouser | BOTTOM |
| 2 | Pullover | TOP |
| 3 | Dress |BODY |
| 4 | Coat | TOP |
| 5 | Sandal | FEET |
| 6 | Shirt | TOP |
| 7 | Sneaker | FEET |
| 8 | Bag | ACCESSORY |
| 9 | Ankle boot | FEET |

In [0]:
dict_coarse = {0: 0,
              1: 1,
              2: 0,
              3: 2,
              4: 0,
              5: 3,
              6: 0,
              7: 3,
              8: 4,
              9: 3}

y_train_coarse = np.vectorize(dict_coarse.get)(y_train)
y_val_coarse = np.vectorize(dict_coarse.get)(y_val)
y_test_coarse = np.vectorize(dict_coarse.get)(y_test)

y_train_c_cat = np_utils.to_categorical(y_train_coarse)
y_val_c_cat = np_utils.to_categorical(y_val_coarse)
y_test_c_cat= np_utils.to_categorical(y_test_coarse)

In [0]:
import keras
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Input
from keras import optimizers
from keras.callbacks import TensorBoard, ReduceLROnPlateau, CSVLogger
from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras import backend as K

In [0]:
class TimingCallback(keras.callbacks.Callback):
    
  def on_epoch_begin(self, epoch, logs={}):
    self.starttime=time()
  def on_epoch_end(self, epoch, logs={}):
    logs['time_elapsed'] = (time()-self.starttime)

In [0]:
class AdaptativeLossWeightsModifier2Vars(keras.callbacks.Callback):
  def __init__(self, alpha, beta, decay_rate=0.5):
    self.alpha = alpha
    self.beta = beta
    self.decay_rate = decay_rate
  
  def calculate_exponential(self, ratio, decay_rate, epoch):
    return np.exp(-ratio*decay_rate*epoch)

  def on_epoch_end(self, epoch, logs={}):
    if epoch < 1:
      pass
    
    else:
      loss_coarse = self.model.history.history['predictions_coarse_loss'][-1]
      loss_fine = self.model.history.history['predictions_fine_loss'][-1]
      ratio = (loss_coarse/loss_fine) 
      decaying = self.calculate_exponential(ratio, self.decay_rate, epoch)
        
      K.set_value(self.alpha, decaying)
      K.set_value(self.beta, 1 - decaying)
      print('Changing loss weights to: coarse = {}, fine = {}'.format(K.eval(self.alpha), K.eval(self.beta)))  
    
    logs['alpha'] = K.eval(self.alpha)
    logs['beta'] = K.eval(self.beta) 

In [0]:
img_rows, img_cols = 28, 28
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)

In [0]:
num_classes_coarse = 5
num_classes_fine = 10

img_input = Input(shape=input_shape, name='input')


x = Conv2D(32, (3, 3), activation='relu', name='block1_conv1')(img_input)
x = MaxPooling2D((2, 2), name='block1_pool')(x)

x = Conv2D(64, (3, 3), activation='relu', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), name='block2_pool')(x)


x = Flatten(name='flatten')(x)
x = Dense(128, name='fc_1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)

coarse_pred = Dense(num_classes_coarse, activation='softmax', name='predictions_coarse')(x)
fine_pred = Dense(num_classes_fine, activation='softmax', name='predictions_fine')(x)

In [0]:
model = Model(inputs=img_input, outputs=[coarse_pred, fine_pred], name='split_baseline_net')

In [29]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 26, 26, 32)   320         input[0][0]                      
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 13, 13, 32)   0           block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 11, 11, 64)   18496       block1_pool[0][0]                
__________________________________________________________________________________________________
block2_poo

In [0]:
alpha = K.variable(value=0.95, dtype="float32", name="alpha") # A1 in paper
beta = K.variable(value=0.05, dtype="float32", name="beta") # A2 in paper

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              loss_weights=[alpha, beta],
              metrics=['accuracy'])

In [0]:
tb_cb = TensorBoard(log_dir='drive/TCC-ITAU/fashion-mnist/tensorboard/adaptative-split-baseline-net-simple', histogram_freq=0)
clr_cb = ReduceLROnPlateau(monitor='val_predictions_fine_loss', factor=0.5, patience=10, verbose=1)
time_cb = TimingCallback()
csv_cb = CSVLogger('drive/TCC-ITAU/fashion-mnist/training-data/split-baseline-net/adaptative-split-baseline-net-simple.csv', separator=',', append=False)
change_lw = AdaptativeLossWeightsModifier2Vars(alpha, beta, decay_rate=0.5)

cbks = [tb_cb, clr_cb, time_cb, change_lw, csv_cb]

In [32]:
epochs = 50
start_time = time()
training = model.fit(train_features, [y_train_c_cat, y_train_cat],
                     validation_data=(vali_features, [y_val_c_cat, y_val_cat]),
                     epochs=epochs,
                     batch_size=512, 
                     callbacks=cbks,
                     verbose=1)
training_time = time() - start_time

Train on 48000 samples, validate on 12000 samples
Epoch 1/50
Epoch 2/50
Changing loss weights to: coarse = 0.9200916886329651, fine = 0.07990828901529312
Epoch 3/50
Changing loss weights to: coarse = 0.8547783493995667, fine = 0.14522168040275574
Epoch 4/50
Changing loss weights to: coarse = 0.7855736613273621, fine = 0.21442635357379913
Epoch 5/50
Changing loss weights to: coarse = 0.7205458879470825, fine = 0.2794541120529175
Epoch 6/50
Changing loss weights to: coarse = 0.6553766131401062, fine = 0.3446233570575714
Epoch 7/50
Changing loss weights to: coarse = 0.5973042249679565, fine = 0.40269580483436584
Epoch 8/50
Changing loss weights to: coarse = 0.543657660484314, fine = 0.45634233951568604
Epoch 9/50
Changing loss weights to: coarse = 0.4979146718978882, fine = 0.5020853281021118
Epoch 10/50
Changing loss weights to: coarse = 0.45907294750213623, fine = 0.5409270524978638
Epoch 11/50
Changing loss weights to: coarse = 0.42424076795578003, fine = 0.57575923204422
Epoch 12/50
C

In [33]:
model.evaluate(test_features, [y_test_c_cat, y_test_cat])



[0.38073392303846776, 0.08281451321828062, 0.3807366230931133, 0.9779, 0.9158]

In [34]:
model.save('drive/TCC-ITAU/fashion-mnist/models/adaptative-split-baseline-net-simple.h5')

TypeError: ignored

In [35]:
training_time

209.4751055240631