In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [14]:
"""Trains a ResNet on the CIFAR10 dataset.

ResNet v1
[a] Deep Residual Learning for Image Recognition
https://arxiv.org/pdf/1512.03385.pdf

ResNet v2
[b] Identity Mappings in Deep Residual Networks
https://arxiv.org/pdf/1603.05027.pdf
"""

from __future__ import print_function
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import LambdaCallback
from keras.callbacks import CSVLogger
from keras.callbacks import Callback
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
from keras.datasets import cifar10
import numpy as np
import pickle
import math
from collections import defaultdict
import os

dirpath = os.getcwd()

seed = 22
resample = True
save_dir = '/home/ubuntu/Projects/hybrid-ensemble/model/run_200'
datafile = '/home/ubuntu/Projects/hybrid-ensemble/data/cifar10_balance/DS3'
top_k = 1

# Set random seed
if seed is not None:
    import tensorflow as tf
    tf.set_random_seed(seed)
    np.random.seed(seed)

# Training parameters
batch_size = 128  # orig paper trained all networks with batch_size=128
epochs = 5 # orig paper epochs = 200
data_augmentation = True
num_classes = 10
initial_lr = 1e-3
snapshot_window_size = int(math.ceil(epochs/top_k))

# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

# Model parameter
# ----------------------------------------------------------------------------
#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
# ----------------------------------------------------------------------------
# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
# ---------------------------------------------------------------------------
n = 3

# Model version
# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
version = 1

# Computed depth from supplied model parameter n
if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

# Model name, depth and version
model_type = 'ResNet%dv%d' % (depth, version)

# Load the CIFAR10 data.
print('Loading data...')
with open(datafile, 'rb') as f:
    (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = pickle.load(f)

# Resample the training data set from training+validating data set with the same class distribution with the loaded ones
if resample:
    print('Resampling training and validating data sets...')
    x_tv = np.concatenate((x_train, x_valid), axis=0)
    y_tv = np.concatenate((y_train, y_valid), axis=0)
    index_dict = defaultdict(list)
    for i in range(len(y_tv)):
        index_dict[y_tv[i][0]].append(i)
    valid_index_dict = defaultdict(list)
    for i in range(len(y_valid)):
        valid_index_dict[y_valid[i][0]].append(i)
    valid_index = []
    for c in valid_index_dict.keys():
        valid_index.extend(np.random.choice(index_dict[c], size=len(valid_index_dict[c]), replace=False))
    train_index = np.setdiff1d(range(len(y_tv)), valid_index)

    x_train, y_train = x_tv[train_index], y_tv[train_index]
    x_valid, y_valid = x_tv[valid_index], y_tv[valid_index]
    
# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
x_valid = x_valid.astype('float32') / 255

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
    x_valid -= x_train_mean

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)
print(x_valid.shape[0], 'valid samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)

def next_run_dir(path):
    """
    Naive (slow) version of next_path
    """
    i = 1
    while os.path.exists('{}_{}'.format(path, i)):
        i += 1
    return '{}_{}'.format(path, i)

def cyclic_cosine_anneal_schedule(initial_lr=1e-3, update_window_size=40):
    '''
    Wrapper function to create a LearningRateScheduler with cosine annealing schedule.
    '''
    def lr_schedule(epoch):
        """Learning Rate Schedule

        Learning rate is scheduled to be updated per epoch with a cosine function per epoch. 
        Learning rate is raised to initial_lr every snapshot_window_size.

        # Arguments
            epoch (int): The number of epochs

        # Returns
            lr (float32): learning rate
        """
        lr = initial_lr / 2 * (math.cos(math.pi * ((epoch % update_window_size) / update_window_size)) + 1)
        print('Learning rate: ', lr)
        return lr
    
    return LearningRateScheduler(lr_schedule)

class MyCallback(Callback):
    def on_batch_begin(self, batch, logs=None):
        iteration = self.model.optimizer.iterations   
        update_window_size = self.params.update_window_size
        lr = initial_lr / 2 * (math.cos(math.pi * ((K.eval(iteration) % update_window_size) / update_window_size)) + 1)
        K.set_value(self.model.optimizer.lr, lr)
        print('\n batch {}, lr {}, iterations {}'.format(batch, K.eval(lr), K.eval(iteration)))
        
def cyclic_cosine_anneal_schedule_itr(batch_logs, initial_lr=1e-3, update_window_size=15600):
    '''
    Wrapper function to create a LearningRateScheduler with cosine annealing schedule per iteration.
    '''
    def lr_schedule(batch, logs):
        """Learning Rate Schedule

        Learning rate is scheduled to be updated per epoch with a cosine function per iteration. 
        Learning rate is raised to initial_lr every snapshot_window_size.

        # Arguments
            epoch (int): The number of epochs

        # Returns
            lr (float32): learning rate
        """
        iteration = model.optimizer.iterations
        print('\n Optimizer iteration {}, batch {}'.format(K.eval(iteration), batch))
        lr = initial_lr / 2 * (math.cos(math.pi * ((K.eval(iteration) % update_window_size) / update_window_size)) + 1)
        K.set_value(model.optimizer.lr, lr)
        print('\n Learning rate {}, Model learning rate {}'.format(lr, K.eval(model.optimizer.lr)))
    
    def batch_log(batch, logs):
        batch_logs['iteration'].append(K.eval(model.optimizer.iterations))
        batch_logs['lr'].append(K.eval(model.optimizer.lr))
        batch_logs['loss'].append(logs['loss'])
        batch_logs['acc'].append(logs['acc'])
        
    
    return LambdaCallback(on_batch_begin=lr_schedule, on_batch_end=batch_log)

def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    # Arguments
        inputs (tensor): input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): activation name
        batch_normalization (bool): whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or
            activation-bn-conv (False)

    # Returns
        x (tensor): tensor as input to the next layer
    """
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x


def resnet_v1(input_shape, depth, num_classes=10):
    """ResNet Version 1 Model builder [a]

    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
    Last ReLU is after the shortcut connection.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filters is
    doubled. Within each stage, the layers have the same number filters and the
    same number of filters.
    Features maps sizes:
    stage 0: 32x32, 16
    stage 1: 16x16, 32
    stage 2:  8x8,  64
    The Number of parameters is approx the same as Table 6 of [a]:
    ResNet20 0.27M
    ResNet32 0.46M
    ResNet44 0.66M
    ResNet56 0.85M
    ResNet110 1.7M

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model


def resnet_v2(input_shape, depth, num_classes=10):
    """ResNet Version 2 Model builder [b]

    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
    bottleneck layer
    First shortcut connection per layer is 1 x 1 Conv2D.
    Second and onwards shortcut connection is identity.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filter maps is
    doubled. Within each stage, the layers have the same number filters and the
    same filter map sizes.
    Features maps sizes:
    conv1  : 32x32,  16
    stage 0: 32x32,  64
    stage 1: 16x16, 128
    stage 2:  8x8,  256

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    # Instantiate the stack of residual units
    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2    # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])

        num_filters_in = num_filters_out

    # Add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model

print('Building model...')
if version == 2:
    model = resnet_v2(input_shape=input_shape, depth=depth)
else:
    model = resnet_v1(input_shape=input_shape, depth=depth)


model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=initial_lr),
              metrics=['accuracy'])
# model.summary()
print(model_type)

# Prepare model model saving directory.
if not save_dir:
    save_dir = next_run_dir('{}/../model/run'.format(dirpath))
os.makedirs(save_dir)
model_name = 'cifar10_%s_model-{epoch:04d}.h5' % model_type
# model_name = 'cifar10_%s_model-{epoch:04d}-{val_acc:.5f}.h5' % model_type
# model_name = 'cifar10_{}_model.h5'.format(model_type)
filepath = os.path.join(save_dir, model_name)

print('Preparing callbacks...')
# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=False,
                             mode='max')
# Learning rate updater
batch_num = int(x_train.shape[0]/batch_size)
update_window_size = int(math.ceil(epochs*batch_num/top_k))
batch_logs = {'iteration':[], 'lr':[], 'loss':[], 'acc':[]}
lr_scheduler = cyclic_cosine_anneal_schedule_itr(initial_lr=initial_lr, 
                                                 update_window_size=update_window_size, batch_logs=batch_logs)

# Training log writer
csvlog = CSVLogger('callback_training_log.csv', separator=',', append=False)


callbacks = [checkpoint, lr_scheduler, csvlog]

# callbacks = [MyCallback()]
        
# Run training, with or without data augmentation.
if not data_augmentation:
    print('Not using data augmentation.')
    print('Training...')
    history = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_valid, y_valid),
              shuffle=True,
              callbacks=callbacks)
else:
    print('Using real-time data augmentation.')
    print('Training...')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        # set input mean to 0 over the dataset
        featurewise_center=False,
        # set each sample mean to 0
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=0,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # randomly flip images
        horizontal_flip=True,
        # randomly flip images
        vertical_flip=False)

    # Compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    steps_per_epoch = math.ceil(len(x_train) / batch_size)
    history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_valid, y_valid),
                        epochs=epochs, verbose=1, workers=4,
                        steps_per_epoch=steps_per_epoch,
                        callbacks=callbacks)

# Score trained model.
# scores = model.evaluate(x_test, y_test, verbose=1)
# print('Test loss:', scores[0])
# print('Test accuracy:', scores[1])

# Save training log
print('Saving epoch training log...')
train_error = history.history['loss']
valid_accuracy = history.history['val_acc']
logfile = '{}/training_log.csv'.format(save_dir)
f = open(logfile, 'w')
f.write('current_epoch,total_epochs,train_loss,validation_accuracy\n')
for i in range(len(train_error)):
    f.write('{},{},{},{}\n'.format(i+1, epochs, train_error[i], valid_accuracy[i]))
f.close()

print('Saving batch training log...')
f = open('batch_training_log.csv', 'w')
f.write('current_iteration,total_iteration,learning_rate,train_loss,train_accuracy\n')
total_iteration = int(math.ceil(x_train.shape[0]/batch_size)*epochs)
for i in range(len(batch_logs['iteration'])):
    f.write('{},{},{},{},{}\n'.format(batch_logs['iteration'][i], 
                                   total_iteration, 
                                   batch_logs['lr'][i], 
                                   batch_logs['loss'][i], 
                                   batch_logs['acc'][i]))
f.close()

import csv
with open('batch_training_log.csv', 'w') as f:  
    w = csv.DictWriter(f, batch_logs.keys())
    w.writeheader()
    w.writerow(batch_logs)

# Save index for combination
print('Writing index file and predict files...')
indexfile = '{}/index.csv'.format(save_dir)
f = open(indexfile, 'w')
window_size = int(epochs/top_k)
top_x = []
for i in range(0, top_k):
    top_x.append(np.argmax(valid_accuracy[i*snapshot_window_size:(i+1)*snapshot_window_size]) + i*snapshot_window_size)
top_v = [valid_accuracy[i] for i in top_x]
for x,v in zip(top_x, top_v):
    name = 'cifar10_{}_model-{:04d}.h5'.format(model_type, x+1)
    weight = v
    f.write('{},{}\n'.format(name, weight))
    # predicting
    filepath = os.path.join(save_dir, name)
    model.load_weights(filepath)
    predicts = model.predict(x_test)
    # Save predicts
    predictfile = '{}/prediction_{:04d}.csv'.format(save_dir, x+1)
    f1 = open(predictfile,'w')
    header = '0,1,2,3,4,5,6,7,8,9\n'
    f1.write(header)
    np.savetxt(f1, predicts, delimiter=",")
    f1.close()
f.close()

# Delete unwanted model files
print('Deleting unwanted model files...')
no_top_x = range(len(valid_accuracy))
no_top_x = list(set(no_top_x) - set(top_x))
for no_top_x_index in no_top_x:
    remove_filename = '{}/cifar10_{}_model-{:04d}.h5'.format(save_dir, model_type, no_top_x_index+1)
    os.remove(remove_filename)

# Save targets
print('Saving target file...')
targetfile = '{}/target.csv'.format(save_dir)
f2 = open(targetfile,'w')
header = '0,1,2,3,4,5,6,7,8,9\n'
f2.write(header)
np.savetxt(f2, y_test, delimiter=",")
f2.close()

Loading data...
Resampling training and validating data sets...
x_train shape: (50000, 32, 32, 3)
50000 train samples
4500 test samples
y_train shape: (50000, 1)
4500 valid samples
Building model...
ResNet20v1
Preparing callbacks...
Using real-time data augmentation.
Training...
Epoch 1/5

 Optimizer iteration 0, batch 0

 Learning rate 0.001, Model learning rate 0.0010000000474974513
  1/391 [..............................] - ETA: 1:09:31 - loss: 5.1286 - acc: 0.0703
 Optimizer iteration 1, batch 1

 Learning rate 0.0009999993511109622, Model learning rate 0.0009999993490055203

 Optimizer iteration 2, batch 2

 Learning rate 0.0009999974044455327, Model learning rate 0.0009999973699450493
  3/391 [..............................] - ETA: 23:13 - loss: 4.2290 - acc: 0.0807  
 Optimizer iteration 3, batch 3

 Learning rate 0.0009999941600087644, Model learning rate 0.0009999941103160381

 Optimizer iteration 4, batch 4

 Learning rate 0.0009999896178090784, Model learning rate 0.00099998

 45/391 [==>...........................] - ETA: 1:43 - loss: 2.3157 - acc: 0.2694
 Optimizer iteration 45, batch 45

 Learning rate 0.0009986865748457456, Model learning rate 0.000998686533421278

 Optimizer iteration 46, batch 46

 Learning rate 0.000998627578805444, Model learning rate 0.0009986276272684336
 47/391 [==>...........................] - ETA: 1:39 - loss: 2.2950 - acc: 0.2753
 Optimizer iteration 47, batch 47

 Learning rate 0.0009985672885492634, Model learning rate 0.0009985673241317272
 48/391 [==>...........................] - ETA: 1:37 - loss: 2.2860 - acc: 0.2772
 Optimizer iteration 48, batch 48

 Learning rate 0.0009985057042336898, Model learning rate 0.0009985057404264808
 49/391 [==>...........................] - ETA: 1:35 - loss: 2.2795 - acc: 0.2803
 Optimizer iteration 49, batch 49

 Learning rate 0.000998442826018569, Model learning rate 0.0009984428761526942
 50/391 [==>...........................] - ETA: 1:33 - loss: 2.2721 - acc: 0.2819
 Optimizer iterat

 90/391 [=====>........................] - ETA: 56s - loss: 2.0758 - acc: 0.3299
 Optimizer iteration 90, batch 90

 Learning rate 0.0009947531997255255, Model learning rate 0.0009947532089427114

 Optimizer iteration 91, batch 91

 Learning rate 0.0009946361664814943, Model learning rate 0.0009946362115442753
 Optimizer iteration 92, batch 92

 Learning rate 0.0009945178493815181, Model learning rate 0.0009945178171619773
 Optimizer iteration 93, batch 93

 Learning rate 0.000994398248732696, Model learning rate 0.000994398258626461
 Optimizer iteration 94, batch 94

 Learning rate 0.000994277364845458, Model learning rate 0.0009942774195224047

 Optimizer iteration 95, batch 95

 Learning rate 0.0009941551980335653, Model learning rate 0.0009941551834344864
 Optimizer iteration 96, batch 96

 Learning rate 0.0009940317486141082, Model learning rate 0.0009940317831933498
 Optimizer iteration 97, batch 97

 Learning rate 0.0009939070169075071, Model learning rate 0.0009939069859683514


 Optimizer iteration 186, batch 186

 Learning rate 0.0009777185133998268, Model learning rate 0.0009777185041457415

 Optimizer iteration 187, batch 187

 Learning rate 0.0009774801031602629, Model learning rate 0.00097748008556664
 Optimizer iteration 188, batch 188

 Learning rate 0.00097724045359428, Model learning rate 0.00097724050283432

 Optimizer iteration 189, batch 189

 Learning rate 0.0009769995653239022, Model learning rate 0.0009769995231181383
 Optimizer iteration 190, batch 190

 Learning rate 0.0009767574389743681, Model learning rate 0.0009767574956640601

 Optimizer iteration 191, batch 191

 Learning rate 0.0009765140751741306, Model learning rate 0.00097651407122612
 Optimizer iteration 192, batch 192

 Learning rate 0.000976269474554854, Model learning rate 0.0009762694826349616

 Optimizer iteration 193, batch 193

 Learning rate 0.0009760236377514128, Model learning rate 0.0009760236134752631
 Optimizer iteration 194, batch 194

 Learning rate 0.000975776565401

 Optimizer iteration 234, batch 234

 Learning rate 0.0009648882429441257, Model learning rate 0.0009648882551118731

 Optimizer iteration 235, batch 235

 Learning rate 0.0009645911019582466, Model learning rate 0.0009645911050029099
 Optimizer iteration 236, batch 236

 Learning rate 0.0009642927551000749, Model learning rate 0.0009642927325330675

 Optimizer iteration 237, batch 237

 Learning rate 0.0009639932031439866, Model learning rate 0.0009639931959100068
 Optimizer iteration 238, batch 238

 Learning rate 0.0009636924468674854, Model learning rate 0.0009636924369260669

 Optimizer iteration 239, batch 239

 Learning rate 0.0009633904870512015, Model learning rate 0.0009633905137889087
 Optimizer iteration 240, batch 240

 Learning rate 0.0009630873244788883, Model learning rate 0.0009630873100832105
 Optimizer iteration 241, batch 241

 Learning rate 0.0009627829599374214, Model learning rate 0.000962782942224294

 Optimizer iteration 242, batch 242

 Learning rate 0.0009624


 Optimizer iteration 281, batch 281

 Learning rate 0.0009496321798920731, Model learning rate 0.0009496322018094361
 Optimizer iteration 282, batch 282

 Learning rate 0.0009492792503554695, Model learning rate 0.0009492792305536568
 Optimizer iteration 283, batch 283

 Learning rate 0.000948925154689344, Model learning rate 0.0009489251533523202
 Optimizer iteration 284, batch 284

 Learning rate 0.0009485698938127715, Model learning rate 0.0009485699119977653
 Optimizer iteration 285, batch 285

 Learning rate 0.0009482134686478518, Model learning rate 0.0009482134482823312
 Optimizer iteration 286, batch 286

 Learning rate 0.0009478558801197064, Model learning rate 0.0009478558786213398
 Optimizer iteration 287, batch 287

 Learning rate 0.0009474971291564764, Model learning rate 0.0009474971448071301
 Optimizer iteration 288, batch 288

 Learning rate 0.0009471372166893198, Model learning rate 0.0009471371886320412

 Optimizer iteration 289, batch 289

 Learning rate 0.000946776

 Optimizer iteration 371, batch 371

 Learning rate 0.0009133137636305345, Model learning rate 0.0009133137646131217
 Optimizer iteration 372, batch 372

 Learning rate 0.0009128599121797621, Model learning rate 0.0009128599194809794

 Optimizer iteration 373, batch 373

 Learning rate 0.000912404989127905, Model learning rate 0.0009124049684032798
 Optimizer iteration 374, batch 374

 Learning rate 0.0009119489956557415, Model learning rate 0.0009119489695876837
 Optimizer iteration 375, batch 375

 Learning rate 0.0009114919329468282, Model learning rate 0.0009114919230341911
 Optimizer iteration 376, batch 376

 Learning rate 0.000911033802187497, Model learning rate 0.0009110338287428021

 Optimizer iteration 377, batch 377

 Learning rate 0.000910574604566852, Model learning rate 0.0009105746285058558
 Optimizer iteration 378, batch 378

 Learning rate 0.0009101143412767665, Model learning rate 0.0009101143223233521

 Optimizer iteration 379, batch 379

 Learning rate 0.0009096530

 27/391 [=>............................] - ETA: 19s - loss: 1.3615 - acc: 0.5732
 Optimizer iteration 418, batch 27

 Learning rate 0.0008908439817609514, Model learning rate 0.0008908439776860178
 28/391 [=>............................] - ETA: 19s - loss: 1.3598 - acc: 0.5728
 Optimizer iteration 419, batch 28

 Learning rate 0.0008903410859723847, Model learning rate 0.0008903410634957254
 29/391 [=>............................] - ETA: 20s - loss: 1.3592 - acc: 0.5722
 Optimizer iteration 420, batch 29

 Learning rate 0.0008898371770316111, Model learning rate 0.0008898371597751975
 30/391 [=>............................] - ETA: 20s - loss: 1.3595 - acc: 0.5716
 Optimizer iteration 421, batch 30

 Learning rate 0.0008893322562465546, Model learning rate 0.0008893322665244341
 31/391 [=>............................] - ETA: 20s - loss: 1.3601 - acc: 0.5716
 Optimizer iteration 422, batch 31

 Learning rate 0.0008888263249277656, Model learning rate 0.0008888263255357742
 32/391 [=>....

 70/391 [====>.........................] - ETA: 21s - loss: 1.3464 - acc: 0.5715
 Optimizer iteration 461, batch 70

 Learning rate 0.0008683210482951527, Model learning rate 0.0008683210471644998
 71/391 [====>.........................] - ETA: 21s - loss: 1.3472 - acc: 0.5714
 Optimizer iteration 462, batch 71

 Learning rate 0.0008677758000018776, Model learning rate 0.0008677758160047233
 72/391 [====>.........................] - ETA: 21s - loss: 1.3478 - acc: 0.5709
 Optimizer iteration 463, batch 72

 Learning rate 0.0008672295971258625, Model learning rate 0.0008672295953147113
 73/391 [====>.........................] - ETA: 21s - loss: 1.3460 - acc: 0.5718
 Optimizer iteration 464, batch 73

 Learning rate 0.0008666824410848075, Model learning rate 0.0008666824433021247
 74/391 [====>.........................] - ETA: 21s - loss: 1.3476 - acc: 0.5716
 Optimizer iteration 465, batch 74

 Learning rate 0.0008661343332988868, Model learning rate 0.0008661343599669635
 75/391 [====>.

 Optimizer iteration 505, batch 114

 Learning rate 0.0008434461959398376, Model learning rate 0.0008434461778961122
 Optimizer iteration 506, batch 115

 Learning rate 0.0008428603194086966, Model learning rate 0.0008428603177890182

 Optimizer iteration 507, batch 116

 Learning rate 0.0008422735529643444, Model learning rate 0.0008422735263593495
 Optimizer iteration 508, batch 117

 Learning rate 0.0008416858981297663, Model learning rate 0.000841685920022428
 Optimizer iteration 509, batch 118

 Learning rate 0.0008410973564302533, Model learning rate 0.000841097382362932

 Optimizer iteration 510, batch 119

 Learning rate 0.0008405079293933986, Model learning rate 0.0008405079133808613
 Optimizer iteration 511, batch 120

 Learning rate 0.000839917618549093, Model learning rate 0.0008399176294915378
 Optimizer iteration 512, batch 121

 Learning rate 0.0008393264254295217, Model learning rate 0.0008393264142796397
 Optimizer iteration 513, batch 122

 Learning rate 0.00083873435

 Optimizer iteration 551, batch 160

 Learning rate 0.0008155983050506122, Model learning rate 0.0008155982941389084
 Optimizer iteration 552, batch 161

 Learning rate 0.0008149731031504135, Model learning rate 0.0008149730856530368
 Optimizer iteration 553, batch 162

 Learning rate 0.0008143470837198393, Model learning rate 0.0008143470622599125

 Optimizer iteration 554, batch 163

 Learning rate 0.0008137202483837583, Model learning rate 0.0008137202239595354
 Optimizer iteration 555, batch 164

 Learning rate 0.0008130925987691568, Model learning rate 0.0008130925707519054
 Optimizer iteration 556, batch 165

 Learning rate 0.0008124641365051346, Model learning rate 0.0008124641608446836
 Optimizer iteration 557, batch 166

 Learning rate 0.0008118348632229007, Model learning rate 0.0008118348778225482
 Optimizer iteration 558, batch 167

 Learning rate 0.0008112047805557692, Model learning rate 0.0008112047798931599

 Optimizer iteration 559, batch 168

 Learning rate 0.00081057

 Optimizer iteration 598, batch 207

 Learning rate 0.000785356783842216, Model learning rate 0.000785356794949621
 Optimizer iteration 599, batch 208

 Learning rate 0.0007846949480747588, Model learning rate 0.0007846949738450348
 Optimizer iteration 600, batch 209

 Learning rate 0.0007840323733655779, Model learning rate 0.0007840323960408568
 Optimizer iteration 601, batch 210

 Learning rate 0.0007833690614344231, Model learning rate 0.000783369061537087
 Optimizer iteration 602, batch 211

 Learning rate 0.0007827050140029577, Model learning rate 0.0007827050285413861

 Optimizer iteration 603, batch 212

 Learning rate 0.0007820402327947542, Model learning rate 0.0007820402388460934
 Optimizer iteration 604, batch 213

 Learning rate 0.0007813747195352895, Model learning rate 0.0007813746924512088

 Optimizer iteration 605, batch 214

 Learning rate 0.0007807084759519405, Model learning rate 0.0007807084475643933
 Optimizer iteration 606, batch 215

 Learning rate 0.00078004150

 Optimizer iteration 687, batch 296

 Learning rate 0.0007237594901927699, Model learning rate 0.0007237594691105187
 Optimizer iteration 688, batch 297

 Learning rate 0.0007230388293475197, Model learning rate 0.00072303885826841
 Optimizer iteration 689, batch 298

 Learning rate 0.0007223175895924637, Model learning rate 0.0007223176071420312
 Optimizer iteration 690, batch 299

 Learning rate 0.0007215957727996207, Model learning rate 0.0007215957739390433
 Optimizer iteration 691, batch 300

 Learning rate 0.0007208733808425063, Model learning rate 0.0007208733586594462
 Optimizer iteration 692, batch 301

 Learning rate 0.0007201504155961296, Model learning rate 0.000720150419510901
 Optimizer iteration 693, batch 302

 Learning rate 0.0007194268789369874, Model learning rate 0.0007194268982857466

 Optimizer iteration 694, batch 303

 Learning rate 0.00071870277274306, Model learning rate 0.000718702794983983
 Optimizer iteration 695, batch 304

 Learning rate 0.000717978098893

 Optimizer iteration 732, batch 341

 Learning rate 0.0006907834125711476, Model learning rate 0.0006907834322191775
 Optimizer iteration 733, batch 342

 Learning rate 0.0006900385746064268, Model learning rate 0.0006900385487824678
 Optimizer iteration 734, batch 343

 Learning rate 0.0006892932433859147, Model learning rate 0.0006892932578921318
 Optimizer iteration 735, batch 344

 Learning rate 0.0006885474208441603, Model learning rate 0.0006885474431328475

 Optimizer iteration 736, batch 345

 Learning rate 0.0006878011089169878, Model learning rate 0.0006878011045046151
 Optimizer iteration 737, batch 346

 Learning rate 0.0006870543095414918, Model learning rate 0.0006870543002150953

 Optimizer iteration 738, batch 347

 Learning rate 0.0006863070246560319, Model learning rate 0.0006863070302642882
 Optimizer iteration 739, batch 348

 Learning rate 0.0006855592562002281, Model learning rate 0.0006855592364445329
 Optimizer iteration 740, batch 349

 Learning rate 0.00068481

 Optimizer iteration 779, batch 388

 Learning rate 0.0006552744071497918, Model learning rate 0.0006552744307555258
 Optimizer iteration 780, batch 389

 Learning rate 0.0006545084971874737, Model learning rate 0.0006545084761455655
 Optimizer iteration 781, batch 390

 Learning rate 0.0006537421861896752, Model learning rate 0.0006537421722896397

Epoch 00002: saving model to /home/ubuntu/Projects/hybrid-ensemble/model/run_200/cifar10_ResNet20v1_model-0002.h5
Epoch 3/5

 Optimizer iteration 782, batch 0

 Learning rate 0.0006529754761453999, Model learning rate 0.0006529754609800875
  1/391 [..............................] - ETA: 15s - loss: 1.1736 - acc: 0.6094
 Optimizer iteration 783, batch 1

 Learning rate 0.0006522083690446862, Model learning rate 0.0006522083422169089

 Optimizer iteration 784, batch 2

 Learning rate 0.0006514408668786038, Model learning rate 0.0006514408742077649
  3/391 [..............................] - ETA: 16s - loss: 1.1745 - acc: 0.6406
 Optimizer iter

 85/391 [=====>........................] - ETA: 21s - loss: 1.1587 - acc: 0.6437
 Optimizer iteration 867, batch 85

 Learning rate 0.0005865596434372857, Model learning rate 0.0005865596467629075
 86/391 [=====>........................] - ETA: 21s - loss: 1.1567 - acc: 0.6451
 Optimizer iteration 868, batch 86

 Learning rate 0.0005857661577735372, Model learning rate 0.0005857661599293351
 87/391 [=====>........................] - ETA: 21s - loss: 1.1578 - acc: 0.6448
 Optimizer iteration 869, batch 87

 Learning rate 0.0005849724494989103, Model learning rate 0.0005849724402651191
 88/391 [=====>........................] - ETA: 21s - loss: 1.1593 - acc: 0.6441
 Optimizer iteration 870, batch 88

 Learning rate 0.0005841785206735191, Model learning rate 0.0005841785459779203
 89/391 [=====>........................] - ETA: 21s - loss: 1.1582 - acc: 0.6444
 Optimizer iteration 871, batch 89

 Learning rate 0.0005833843733580511, Model learning rate 0.0005833843606524169
 90/391 [=====>

 Optimizer iteration 953, batch 171

 Learning rate 0.0005177180944443821, Model learning rate 0.0005177180864848197
 Optimizer iteration 954, batch 172

 Learning rate 0.0005169130411451083, Model learning rate 0.0005169130163267255
 Optimizer iteration 955, batch 173

 Learning rate 0.0005161079439470866, Model learning rate 0.0005161079461686313
 Optimizer iteration 956, batch 174

 Learning rate 0.0005153028049399916, Model learning rate 0.0005153028178028762
 Optimizer iteration 957, batch 175

 Learning rate 0.0005144976262136073, Model learning rate 0.0005144976312294602
 Optimizer iteration 958, batch 176

 Learning rate 0.00051369240985782, Model learning rate 0.0005136923864483833
 Optimizer iteration 959, batch 177

 Learning rate 0.0005128871579626142, Model learning rate 0.0005128871416673064

 Optimizer iteration 960, batch 178

 Learning rate 0.0005120818726180662, Model learning rate 0.0005120818968862295
 Optimizer iteration 961, batch 179

 Learning rate 0.00051127655

 Optimizer iteration 996, batch 214

 Learning rate 0.0004830869588548918, Model learning rate 0.0004830869729630649
 Optimizer iteration 997, batch 215

 Learning rate 0.0004822819055556179, Model learning rate 0.00048228190280497074
 Optimizer iteration 998, batch 216

 Learning rate 0.00048147689824465313, Model learning rate 0.0004814768908545375

 Optimizer iteration 999, batch 217

 Learning rate 0.00048067193901143887, Model learning rate 0.00048067193711176515
 Optimizer iteration 1000, batch 218

 Learning rate 0.0004798670299452926, Model learning rate 0.0004798670415766537
 Optimizer iteration 1001, batch 219

 Learning rate 0.0004790621731354003, Model learning rate 0.00047906217514537275
 Optimizer iteration 1002, batch 220

 Learning rate 0.00047825737067081327, Model learning rate 0.0004782573669217527
 Optimizer iteration 1003, batch 221

 Learning rate 0.00047745262464044165, Model learning rate 0.00047745261690579355
 Optimizer iteration 1004, batch 222

 Learning rat

 Optimizer iteration 1040, batch 258

 Learning rate 0.00044773576836617336, Model learning rate 0.00044773577246814966

 Optimizer iteration 1041, batch 259

 Learning rate 0.0004469347127759222, Model learning rate 0.0004469347186386585
 Optimizer iteration 1042, batch 260

 Learning rate 0.0004461337949196036, Model learning rate 0.0004461337812244892

 Optimizer iteration 1043, batch 261

 Learning rate 0.0004453330168760451, Model learning rate 0.00044533301843330264
 Optimizer iteration 1044, batch 262

 Learning rate 0.00044453238072371116, Model learning rate 0.0004445323720574379
 Optimizer iteration 1045, batch 263

 Learning rate 0.0004437318885406973, Model learning rate 0.0004437319003045559
 Optimizer iteration 1046, batch 264

 Learning rate 0.0004429315424047263, Model learning rate 0.0004429315449669957
 Optimizer iteration 1047, batch 265

 Learning rate 0.0004421313443931416, Model learning rate 0.0004421313351485878

 Optimizer iteration 1048, batch 266

 Learning r

 Optimizer iteration 1084, batch 302

 Learning rate 0.0004126470955693806, Model learning rate 0.00041264708852395415

 Optimizer iteration 1085, batch 303

 Learning rate 0.0004118540613054155, Model learning rate 0.0004118540673516691
 Optimizer iteration 1086, batch 304

 Learning rate 0.00041106125582918385, Model learning rate 0.0004110612499061972
 Optimizer iteration 1087, batch 305

 Learning rate 0.0004102686811984568, Model learning rate 0.0004102686943951994
 Optimizer iteration 1088, batch 306

 Learning rate 0.00040947633947040614, Model learning rate 0.00040947634261101484

 Optimizer iteration 1089, batch 307

 Learning rate 0.00040868423270159945, Model learning rate 0.0004086842236574739
 Optimizer iteration 1090, batch 308

 Learning rate 0.0004078923629479943, Model learning rate 0.0004078923666384071
 Optimizer iteration 1091, batch 309

 Learning rate 0.00040710073226493307, Model learning rate 0.00040710074244998395

 Optimizer iteration 1092, batch 310

 Learnin

 Optimizer iteration 1130, batch 348

 Learning rate 0.0003764354466171652, Model learning rate 0.0003764354332815856
 Optimizer iteration 1131, batch 349

 Learning rate 0.0003756550564175727, Model learning rate 0.0003756550431717187

 Optimizer iteration 1132, batch 350

 Learning rate 0.00037487498896226335, Model learning rate 0.00037487500230781734
 Optimizer iteration 1133, batch 351

 Learning rate 0.00037409524627594605, Model learning rate 0.00037409525248222053
 Optimizer iteration 1134, batch 352

 Learning rate 0.0003733158303824868, Model learning rate 0.00037331582279875875
 Optimizer iteration 1135, batch 353

 Learning rate 0.0003725367433049033, Model learning rate 0.00037253674236126244
 Optimizer iteration 1136, batch 354

 Learning rate 0.0003717579870653601, Model learning rate 0.00037175798206590116
 Optimizer iteration 1137, batch 355

 Learning rate 0.0003709795636851622, Model learning rate 0.00037097957101650536

 Optimizer iteration 1138, batch 356

 Learnin

  3/391 [..............................] - ETA: 16s - loss: 1.0563 - acc: 0.6979
 Optimizer iteration 1176, batch 3

 Learning rate 0.0003409021281127835, Model learning rate 0.00034090213011950254
  4/391 [..............................] - ETA: 17s - loss: 1.0520 - acc: 0.6875
 Optimizer iteration 1177, batch 4

 Learning rate 0.00034013866607376307, Model learning rate 0.0003401386784389615

 Optimizer iteration 1178, batch 5

 Learning rate 0.00033937561896381146, Model learning rate 0.0003393756051082164
  6/391 [..............................] - ETA: 17s - loss: 1.0165 - acc: 0.6901
 Optimizer iteration 1179, batch 6

 Learning rate 0.0003386129887634601, Model learning rate 0.0003386129974387586
  7/391 [..............................] - ETA: 18s - loss: 1.0468 - acc: 0.6763
 Optimizer iteration 1180, batch 7

 Learning rate 0.0003378507774521587, Model learning rate 0.00033785076811909676
  8/391 [..............................] - ETA: 18s - loss: 1.0429 - acc: 0.6787
 Optimizer

 48/391 [==>...........................] - ETA: 22s - loss: 1.0365 - acc: 0.6865
 Optimizer iteration 1221, batch 48

 Learning rate 0.00030698505239569424, Model learning rate 0.0003069850499741733
 49/391 [==>...........................] - ETA: 22s - loss: 1.0368 - acc: 0.6873
 Optimizer iteration 1222, batch 49

 Learning rate 0.0003062422067739485, Model learning rate 0.00030624220380559564

 Optimizer iteration 1223, batch 50

 Learning rate 0.00030549986406143496, Model learning rate 0.00030549985240213573
 51/391 [==>...........................] - ETA: 22s - loss: 1.0393 - acc: 0.6869
 Optimizer iteration 1224, batch 51

 Learning rate 0.0003047580261849456, Model learning rate 0.00030475802486762404
 52/391 [==>...........................] - ETA: 22s - loss: 1.0405 - acc: 0.6863
 Optimizer iteration 1225, batch 52

 Learning rate 0.0003040166950699625, Model learning rate 0.0003040166920982301
 53/391 [===>..........................] - ETA: 22s - loss: 1.0398 - acc: 0.6860
 Opt

 Optimizer iteration 1315, batch 142

 Learning rate 0.00023960978786741877, Model learning rate 0.00023960978433024138
 Optimizer iteration 1316, batch 143

 Learning rate 0.00023892244703764342, Model learning rate 0.00023892245371825993

 Optimizer iteration 1317, batch 144

 Learning rate 0.00023823578384931632, Model learning rate 0.00023823577794246376
 Optimizer iteration 1318, batch 145

 Learning rate 0.0002375498000847107, Model learning rate 0.00023754980065859854

 Optimizer iteration 1319, batch 146

 Learning rate 0.00023686449752433614, Model learning rate 0.00023686449276283383
 Optimizer iteration 1320, batch 147

 Learning rate 0.00023617987794693357, Model learning rate 0.0002361798833590001
 Optimizer iteration 1321, batch 148

 Learning rate 0.00023549594312947188, Model learning rate 0.00023549594334326684

 Optimizer iteration 1322, batch 149

 Learning rate 0.00023481269484714208, Model learning rate 0.00023481270181946456
 Optimizer iteration 1323, batch 150

 

 Optimizer iteration 1360, batch 187

 Learning rate 0.00020937533765518184, Model learning rate 0.00020937534281983972

 Optimizer iteration 1361, batch 188

 Learning rate 0.0002087202294340494, Model learning rate 0.00020872023014817387
 Optimizer iteration 1362, batch 189

 Learning rate 0.00020806587724591725, Model learning rate 0.0002080658741760999

 Optimizer iteration 1363, batch 190

 Learning rate 0.00020741228278919343, Model learning rate 0.00020741228945553303
 Optimizer iteration 1364, batch 191

 Learning rate 0.00020675944776031875, Model learning rate 0.0002067594468826428

 Optimizer iteration 1365, batch 192

 Learning rate 0.00020610737385376348, Model learning rate 0.0002061073755612597
 Optimizer iteration 1366, batch 193

 Learning rate 0.0002054560627620219, Model learning rate 0.00020545606093946844

 Optimizer iteration 1367, batch 194

 Learning rate 0.00020480551617560832, Model learning rate 0.0002048055175691843
 Optimizer iteration 1368, batch 195

 Lea

 Optimizer iteration 1408, batch 235

 Learning rate 0.00017881193376411818, Model learning rate 0.0001788119407137856
 Optimizer iteration 1409, batch 236

 Learning rate 0.00017819499606653772, Model learning rate 0.00017819499771576375
 Optimizer iteration 1410, batch 237

 Learning rate 0.00017757889363191482, Model learning rate 0.00017757889872882515

 Optimizer iteration 1411, batch 238

 Learning rate 0.00017696362805937776, Model learning rate 0.00017696362920105457
 Optimizer iteration 1412, batch 239

 Learning rate 0.00017634920094588308, Model learning rate 0.00017634920368436724
 Optimizer iteration 1413, batch 240

 Learning rate 0.00017573561388621101, Model learning rate 0.00017573560762684792
 Optimizer iteration 1414, batch 241

 Learning rate 0.00017512286847296105, Model learning rate 0.00017512287013232708
 Optimizer iteration 1415, batch 242

 Learning rate 0.0001745109662965481, Model learning rate 0.00017451096209697425
 Optimizer iteration 1416, batch 243

 Le

 Optimizer iteration 1453, batch 280

 Learning rate 0.00015189901068080535, Model learning rate 0.00015189901751000434

 Optimizer iteration 1454, batch 281

 Learning rate 0.00015132121152048117, Model learning rate 0.00015132120461203158
 Optimizer iteration 1455, batch 282

 Learning rate 0.00015074431737553158, Model learning rate 0.00015074432303663343
 Optimizer iteration 1456, batch 283

 Learning rate 0.00015016832974331724, Model learning rate 0.00015016832912806422
 Optimizer iteration 1457, batch 284

 Learning rate 0.00014959325011884683, Model learning rate 0.00014959325199015439
 Optimizer iteration 1458, batch 285

 Learning rate 0.00014901907999477165, Model learning rate 0.00014901907707098871
 Optimizer iteration 1459, batch 286

 Learning rate 0.00014844582086138232, Model learning rate 0.00014844581892248243
 Optimizer iteration 1460, batch 287

 Learning rate 0.0001478734742066054, Model learning rate 0.00014787347754463553
 Optimizer iteration 1461, batch 288

 L

 Optimizer iteration 1543, batch 370

 Learning rate 0.00010369140403763638, Model learning rate 0.00010369140363764018
 Optimizer iteration 1544, batch 371

 Learning rate 0.00010320076713303467, Model learning rate 0.00010320076398784295
 Optimizer iteration 1545, batch 372

 Learning rate 0.00010271116014312292, Model learning rate 0.00010271115752402693
 Optimizer iteration 1546, batch 373

 Learning rate 0.00010222258433870341, Model learning rate 0.00010222258424619213
 Optimizer iteration 1547, batch 374

 Learning rate 0.00010173504098790188, Model learning rate 0.00010173504415433854
 Optimizer iteration 1548, batch 375

 Learning rate 0.00010124853135616475, Model learning rate 0.00010124852997250855
 Optimizer iteration 1549, batch 376

 Learning rate 0.00010076305670625507, Model learning rate 0.00010076305625261739

 Optimizer iteration 1550, batch 377

 Learning rate 0.00010027861829824952, Model learning rate 0.00010027861571870744
 Optimizer iteration 1551, batch 378

 

 24/391 [>.............................] - ETA: 20s - loss: 0.9338 - acc: 0.7314
 Optimizer iteration 1588, batch 24

 Learning rate 8.264998881054659e-05, Model learning rate 8.264998905360699e-05
 25/391 [>.............................] - ETA: 20s - loss: 0.9334 - acc: 0.7309
 Optimizer iteration 1589, batch 25

 Learning rate 8.220691770001421e-05, Model learning rate 8.220691961469129e-05

 Optimizer iteration 1590, batch 26

 Learning rate 8.176493099488664e-05, Model learning rate 8.17649342934601e-05
 27/391 [=>............................] - ETA: 20s - loss: 0.9394 - acc: 0.7295
 Optimizer iteration 1591, batch 27

 Learning rate 8.132402984236531e-05, Model learning rate 8.132403308991343e-05
 28/391 [=>............................] - ETA: 21s - loss: 0.9408 - acc: 0.7291
 Optimizer iteration 1592, batch 28

 Learning rate 8.088421538683377e-05, Model learning rate 8.088421600405127e-05
 29/391 [=>............................] - ETA: 21s - loss: 0.9383 - acc: 0.7303
 Optimizer

 Optimizer iteration 1680, batch 116

 Learning rate 4.65628195747273e-05, Model learning rate 4.656281817005947e-05

 Optimizer iteration 1681, batch 117

 Learning rate 4.6223954176149606e-05, Model learning rate 4.622395499609411e-05
 Optimizer iteration 1682, batch 118

 Learning rate 4.588626657877898e-05, Model learning rate 4.5886266889283434e-05
 Optimizer iteration 1683, batch 119

 Learning rate 4.5549757659102795e-05, Model learning rate 4.554975748760626e-05
 Optimizer iteration 1684, batch 120

 Learning rate 4.521442829054856e-05, Model learning rate 4.521442679106258e-05
 Optimizer iteration 1685, batch 121

 Learning rate 4.488027934348271e-05, Model learning rate 4.4880278437631205e-05
 Optimizer iteration 1686, batch 122

 Learning rate 4.4547311685207536e-05, Model learning rate 4.4547312427312136e-05
 Optimizer iteration 1687, batch 123

 Learning rate 4.4215526179959165e-05, Model learning rate 4.4215525122126564e-05
 Optimizer iteration 1688, batch 124

 Learning 

 Optimizer iteration 1727, batch 163

 Learning rate 3.192301211703952e-05, Model learning rate 3.192301301169209e-05
 Optimizer iteration 1728, batch 164

 Learning rate 3.164040085872755e-05, Model learning rate 3.164040026604198e-05
 Optimizer iteration 1729, batch 165

 Learning rate 3.1359005254054274e-05, Model learning rate 3.135900624329224e-05

 Optimizer iteration 1730, batch 166

 Learning rate 3.107882603339785e-05, Model learning rate 3.1078827305464074e-05
 Optimizer iteration 1731, batch 167

 Learning rate 3.079986392397899e-05, Model learning rate 3.0799863452557474e-05
 Optimizer iteration 1732, batch 168

 Learning rate 3.052211964985974e-05, Model learning rate 3.052211832255125e-05
 Optimizer iteration 1733, batch 169

 Learning rate 3.024559393194076e-05, Model learning rate 3.0245593734434806e-05
 Optimizer iteration 1734, batch 170

 Learning rate 2.9970287487960158e-05, Model learning rate 2.9970287869218737e-05

 Optimizer iteration 1735, batch 171

 Learning 

 Optimizer iteration 1817, batch 253

 Learning rate 1.1434351466087178e-05, Model learning rate 1.1434351108619012e-05
 Optimizer iteration 1818, batch 254

 Learning rate 1.1263698832695512e-05, Model learning rate 1.126369897974655e-05
 Optimizer iteration 1819, batch 255

 Learning rate 1.1094314741816935e-05, Model learning rate 1.109431468648836e-05
 Optimizer iteration 1820, batch 256

 Learning rate 1.0926199633097156e-05, Model learning rate 1.0926200047833845e-05
 Optimizer iteration 1821, batch 257

 Learning rate 1.0759353942888573e-05, Model learning rate 1.0759354154288303e-05

 Optimizer iteration 1822, batch 258

 Learning rate 1.0593778104248441e-05, Model learning rate 1.0593777915346436e-05
 Optimizer iteration 1823, batch 259

 Learning rate 1.0429472546938157e-05, Model learning rate 1.0429472240502946e-05
 Optimizer iteration 1824, batch 260

 Learning rate 1.0266437697422026e-05, Model learning rate 1.0266438039252535e-05
 Optimizer iteration 1825, batch 261

 Le

 Optimizer iteration 1864, batch 300

 Learning rate 4.791511883121713e-06, Model learning rate 4.791511855728459e-06

 Optimizer iteration 1865, batch 301

 Learning rate 4.680902408635335e-06, Model learning rate 4.680902293330291e-06
 Optimizer iteration 1866, batch 302

 Learning rate 4.571578562679757e-06, Model learning rate 4.571578756440431e-06
 Optimizer iteration 1867, batch 303

 Learning rate 4.463540629010998e-06, Model learning rate 4.4635407903115265e-06
 Optimizer iteration 1868, batch 304

 Learning rate 4.356788888047747e-06, Model learning rate 4.356788849690929e-06
 Optimizer iteration 1869, batch 305

 Learning rate 4.2513236168700845e-06, Model learning rate 4.25132384407334e-06
 Optimizer iteration 1870, batch 306

 Learning rate 4.147145089218984e-06, Model learning rate 4.1471448639640585e-06
 Optimizer iteration 1871, batch 307

 Learning rate 4.04425357549576e-06, Model learning rate 4.044253728352487e-06
 Optimizer iteration 1872, batch 308

 Learning rate 3

 Optimizer iteration 1908, batch 344

 Learning rate 1.1442038431044854e-06, Model learning rate 1.1442037930464721e-06
 Optimizer iteration 1909, batch 345

 Learning rate 1.0903861639985914e-06, Model learning rate 1.090386149371625e-06
 Optimizer iteration 1910, batch 346

 Learning rate 1.0378634328099267e-06, Model learning rate 1.037863398778427e-06
 Optimizer iteration 1911, batch 347

 Learning rate 9.866357858642206e-07, Model learning rate 9.866357686405536e-07
 Optimizer iteration 1912, batch 348

 Learning rate 9.367033561257233e-07, Model learning rate 9.367033726448426e-07
 Optimizer iteration 1913, batch 349

 Learning rate 8.880662731968747e-07, Model learning rate 8.880662676347129e-07

 Optimizer iteration 1914, batch 350

 Learning rate 8.4072466331786e-07, Model learning rate 8.407246809838398e-07
 Optimizer iteration 1915, batch 351

 Learning rate 7.946786493666647e-07, Model learning rate 7.946786695356423e-07
 Optimizer iteration 1916, batch 352

 Learning rate 

 Optimizer iteration 1953, batch 389

 Learning rate 0.0009999941600087644, Model learning rate 0.0009999941103160381

 Optimizer iteration 1954, batch 390

 Learning rate 0.0009999896178090784, Model learning rate 0.0009999895701184869

Epoch 00005: saving model to /home/ubuntu/Projects/hybrid-ensemble/model/run_200/cifar10_ResNet20v1_model-0005.h5
Saving epoch training log...
Saving batch training log...
Writing index file and predict files...
Deleting unwanted model files...
Saving target file...


In [9]:
logfile = '../model/run_100/training_log.csv'
df = pd.read_csv(logfile, header=0)
valid_accuracy = df['validation_accuracy'].values.tolist()
top_x = []
for i in range(0, 10):
    top_x.append(np.argmax(valid_accuracy[i*snapshot_window_size:(i+1)*snapshot_window_size]) + i*snapshot_window_size)


if version == 2:
    model = resnet_v2(input_shape=input_shape, depth=depth)
else:
    model = resnet_v1(input_shape=input_shape, depth=depth)

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=initial_lr),
              metrics=['accuracy'])
test_scores = []
for k in top_x:
    saved_model = '../model/run_100/cifar10_ResNet20v1_model-{:04d}.h5'.format(k+1)
    model.load_weights(saved_model)
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    test_scores.append(scores[1])
test_scores_1 = test_scores


Test loss: 0.6096683431466421
Test accuracy: 0.8351111112170749
Test loss: 0.5120277782943514
Test accuracy: 0.8691111110051473
Test loss: 0.48847858993212384
Test accuracy: 0.8855555554495917
Test loss: 0.4810516203906801
Test accuracy: 0.8931111111111111
Test loss: 0.48305441366301644
Test accuracy: 0.8942222222222223
Test loss: 0.48185164595974816
Test accuracy: 0.9008888888888889
Test loss: 0.47659654211997987
Test accuracy: 0.9022222222222223
Test loss: 0.49599758842256336
Test accuracy: 0.8997777777777778
Test loss: 0.4874440699285931
Test accuracy: 0.9046666666666666
Test loss: 0.48776051804754467
Test accuracy: 0.9044444444444445


In [10]:
print('Building model...')
if version == 2:
    model = resnet_v2(input_shape=input_shape, depth=depth)
else:
    model = resnet_v1(input_shape=input_shape, depth=depth)


model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=initial_lr),
              metrics=['accuracy'])

logfile = '../model/run_200/training_log.csv'
df = pd.read_csv(logfile, header=0)
valid_accuracy = df['validation_accuracy'].values.tolist()
version = 1
snapshot_window_size = 20
top_x = []
for i in range(0, 10):
    top_x.append(np.argmax(valid_accuracy[i*snapshot_window_size:(i+1)*snapshot_window_size]) + i*snapshot_window_size)


if version == 2:
    model = resnet_v2(input_shape=input_shape, depth=depth)
else:
    model = resnet_v1(input_shape=input_shape, depth=depth)

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=initial_lr),
              metrics=['accuracy'])
test_scores = []
for k in top_x:
    saved_model = '../model/run_200/cifar10_ResNet20v1_model-{:04d}.h5'.format(k+1)
    model.load_weights(saved_model)
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    test_scores.append(scores[1])
test_scores_2 = test_scores

Building model...
Test loss: 0.623283628516727
Test accuracy: 0.8286666666136847
Test loss: 0.5248715903229184
Test accuracy: 0.864
Test loss: 0.4969917231135898
Test accuracy: 0.8771111111111111
Test loss: 0.4798820976946089
Test accuracy: 0.8871111111111111
Test loss: 0.49182780093616907
Test accuracy: 0.8908888888888888
Test loss: 0.4949813829925325
Test accuracy: 0.8993333333333333
Test loss: 0.500278210149871
Test accuracy: 0.9004444444444445
Test loss: 0.5235393349462085
Test accuracy: 0.8928888888888888
Test loss: 0.4998899468051063
Test accuracy: 0.9042222222222223
Test loss: 0.5158649944994185
Test accuracy: 0.9002222222222223


In [4]:
import common_functions as cf
logfile = '../model/run_200/training_log.csv'
df = pd.read_csv(logfile, header=0)
valid_accuracy = df['validation_accuracy'].values.tolist()
version = 1
snapshot_window_size = 20
top_x = []
for i in range(1, 10):
    top_x.append(np.argmax(valid_accuracy[i*snapshot_window_size:(i+1)*snapshot_window_size]) + i*snapshot_window_size)

groundfile = '../model/run_200/target.csv' 
df_g = pd.read_csv(groundfile,header=0)
test_scores = []
for k in top_x:
    predfile = '../model/run_200/prediction_{:04d}.csv'.format(k+1)  
    df_p = pd.read_csv(predfile,header=0)     
    # compute confusion matrix
    cm = cf.confusion_matrix(df_g, df_p)
    tp, total = 0, 0
    for i in range(len(cm)):
        tp += cm[i,i]
        total += np.sum(cm[i])
    ea = tp/total*100
    print('Test accuracy:', ea)
    test_scores.append(ea)
test_scores_3 = test_scores

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return getattr(obj, method)(*args, **kwds)


Test accuracy: 86.4
Test accuracy: 87.71111111111111
Test accuracy: 88.71111111111111
Test accuracy: 89.08888888888889
Test accuracy: 89.93333333333334
Test accuracy: 90.04444444444445
Test accuracy: 89.28888888888889
Test accuracy: 90.42222222222223
Test accuracy: 90.02222222222223
