In [1]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras import optimizers
import numpy as np
from keras.layers.core import Lambda
from keras import backend as K
from keras import regularizers
import AdamW
from tool import *

Using TensorFlow backend.


In [2]:
(fm_x_train, fm_y_train), (fm_x_test, fm_y_test) = keras.datasets.fashion_mnist.load_data()

In [3]:
from collections import Counter

In [4]:
fm_train_RGB_x, fm_train_y, fm_test_RGB_x, fm_test_y = MNIST_To_CIFAR_FORM(fm_x_train, fm_y_train,fm_x_test, fm_y_test)
(C_x_train, C_y_train), (C_x_test, C_y_test) = cifar10.load_data()

In [10]:
class fmvgg:
    def __init__(self,train=True):
        self.num_classes = 10
        self.weight_decay = 0.0005
        self.x_shape = [32,32,3]

        self.model = self.build_model()
        if train:
            self.model = self.train(self.model)
        else:
            self.model.load_weights('fmvgg.h5')


    def build_model(self):
        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.

        model = Sequential()
        weight_decay = self.weight_decay

        model.add(Conv2D(64, (3, 3), padding='same',
                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))

        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))

        model.add(Flatten())
        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))
        return model


    def normalize(self,X_train,X_test):
        #this function normalize inputs for zero mean and unit variance
        # it is used when training a model.
        # Input: training set and test set
        # Output: normalized training set and test set according to the trianing set statistics.
        mean = np.mean(X_train,axis=(0,1,2,3))
        std = np.std(X_train, axis=(0, 1, 2, 3))
        X_train = (X_train-mean)/(std+1e-7)
        X_test = (X_test-mean)/(std+1e-7)
        return X_train, X_test

    def normalize_production(self,x):
        #this function is used to normalize instances in production according to saved training set statistics
        # Input: X - a training set
        # Output X - a normalized training set according to normalization constants.

        #these values produced during first training and are general for the standard cifar10 training set normalization
        mean = np.mean(x)
        std = np.std(x)
        return (x-mean)/(std+1e-7)

    def predict(self,x,normalize=True,batch_size=50):
        if normalize:
            x = self.normalize_production(x)
        return self.model.predict(x,batch_size)

    def train(self,model):

        #training parameters
        batch_size = 128
        maxepoches = 300
        learning_rate = 0.001
        lr_decay = 1e-5
        lr_drop = 20
        # The data, shuffled and split between train and test sets:
        x_train, y_train, x_test, y_test = fm_train_RGB_x, fm_train_y, fm_test_RGB_x, fm_test_y
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)

        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        def lr_scheduler(epoch):
            return learning_rate * (0.5 ** (epoch // lr_drop))
        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

        #data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)



        #optimization details
        # sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
        sgd = optimizers.adam(lr=learning_rate)
        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])


        # training process in a for loop with learning rate drop every 25 epoches.

        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
                                         batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size,
                            epochs=maxepoches,
                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
        model.save_weights('fmvgg.h5')
        return model

if __name__ == '__main__':


    x_train, y_train, x_test, y_test = fm_train_RGB_x, fm_train_y, fm_test_RGB_x, fm_test_y
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)

    model = fmvgg(train = True)
    predicted_x = model.predict(x_test)
    residuals = np.argmax(predicted_x,1)!=np.argmax(y_test,1)

    loss = sum(residuals)/len(residuals)
    print("the validation 0/1 loss is: ",loss)

Epoch 1/300
 - 56s - loss: 3.3012 - acc: 0.6112 - val_loss: 2.9684 - val_acc: 0.6947
Epoch 2/300
 - 49s - loss: 2.2025 - acc: 0.7595 - val_loss: 1.9424 - val_acc: 0.7304
Epoch 3/300
 - 48s - loss: 1.5978 - acc: 0.7927 - val_loss: 1.5471 - val_acc: 0.7714
Epoch 4/300
 - 48s - loss: 1.2164 - acc: 0.8208 - val_loss: 3.3513 - val_acc: 0.4877
Epoch 5/300
 - 48s - loss: 1.0283 - acc: 0.8329 - val_loss: 0.9721 - val_acc: 0.8406
Epoch 6/300
 - 48s - loss: 0.9306 - acc: 0.8428 - val_loss: 0.9374 - val_acc: 0.8322
Epoch 7/300
 - 48s - loss: 0.8896 - acc: 0.8503 - val_loss: 0.8651 - val_acc: 0.8638
Epoch 8/300
 - 48s - loss: 0.8863 - acc: 0.8500 - val_loss: 0.8639 - val_acc: 0.8634
Epoch 9/300
 - 48s - loss: 0.8862 - acc: 0.8512 - val_loss: 2.4835 - val_acc: 0.6277
Epoch 10/300
 - 48s - loss: 0.9120 - acc: 0.8502 - val_loss: 1.0276 - val_acc: 0.8190
Epoch 11/300
 - 48s - loss: 0.9323 - acc: 0.8494 - val_loss: 0.9157 - val_acc: 0.8655
Epoch 12/300
 - 56s - loss: 0.9507 - acc: 0.8556 - val_loss: 1.

Epoch 97/300
 - 48s - loss: 0.2658 - acc: 0.9425 - val_loss: 0.2721 - val_acc: 0.9433
Epoch 98/300
 - 48s - loss: 0.2596 - acc: 0.9458 - val_loss: 0.2711 - val_acc: 0.9434
Epoch 99/300
 - 48s - loss: 0.2601 - acc: 0.9447 - val_loss: 0.2771 - val_acc: 0.9406
Epoch 100/300
 - 48s - loss: 0.2567 - acc: 0.9457 - val_loss: 0.2740 - val_acc: 0.9420
Epoch 101/300
 - 48s - loss: 0.2462 - acc: 0.9485 - val_loss: 0.2669 - val_acc: 0.9443
Epoch 102/300
 - 48s - loss: 0.2436 - acc: 0.9486 - val_loss: 0.2618 - val_acc: 0.9463
Epoch 103/300
 - 48s - loss: 0.2420 - acc: 0.9489 - val_loss: 0.2613 - val_acc: 0.9451
Epoch 104/300
 - 48s - loss: 0.2405 - acc: 0.9488 - val_loss: 0.2647 - val_acc: 0.9435
Epoch 105/300
 - 48s - loss: 0.2381 - acc: 0.9503 - val_loss: 0.2590 - val_acc: 0.9465
Epoch 106/300
 - 48s - loss: 0.2373 - acc: 0.9500 - val_loss: 0.2580 - val_acc: 0.9448
Epoch 107/300
 - 48s - loss: 0.2348 - acc: 0.9502 - val_loss: 0.2605 - val_acc: 0.9449
Epoch 108/300
 - 48s - loss: 0.2347 - acc: 0.9

Epoch 192/300
 - 48s - loss: 0.1790 - acc: 0.9615 - val_loss: 0.2405 - val_acc: 0.9469
Epoch 193/300
 - 48s - loss: 0.1761 - acc: 0.9624 - val_loss: 0.2416 - val_acc: 0.9466
Epoch 194/300
 - 48s - loss: 0.1790 - acc: 0.9623 - val_loss: 0.2415 - val_acc: 0.9473
Epoch 195/300
 - 48s - loss: 0.1789 - acc: 0.9629 - val_loss: 0.2411 - val_acc: 0.9466
Epoch 196/300
 - 48s - loss: 0.1778 - acc: 0.9624 - val_loss: 0.2413 - val_acc: 0.9461
Epoch 197/300
 - 48s - loss: 0.1755 - acc: 0.9630 - val_loss: 0.2420 - val_acc: 0.9465
Epoch 198/300
 - 48s - loss: 0.1765 - acc: 0.9626 - val_loss: 0.2413 - val_acc: 0.9470
Epoch 199/300
 - 48s - loss: 0.1762 - acc: 0.9635 - val_loss: 0.2417 - val_acc: 0.9463
Epoch 200/300
 - 48s - loss: 0.1773 - acc: 0.9620 - val_loss: 0.2421 - val_acc: 0.9465
Epoch 201/300
 - 49s - loss: 0.1770 - acc: 0.9627 - val_loss: 0.2423 - val_acc: 0.9458
Epoch 202/300
 - 48s - loss: 0.1767 - acc: 0.9627 - val_loss: 0.2419 - val_acc: 0.9461
Epoch 203/300
 - 48s - loss: 0.1760 - acc: 

Epoch 287/300
 - 48s - loss: 0.1734 - acc: 0.9631 - val_loss: 0.2407 - val_acc: 0.9468
Epoch 288/300
 - 48s - loss: 0.1714 - acc: 0.9645 - val_loss: 0.2408 - val_acc: 0.9468
Epoch 289/300
 - 48s - loss: 0.1762 - acc: 0.9628 - val_loss: 0.2405 - val_acc: 0.9472
Epoch 290/300
 - 48s - loss: 0.1746 - acc: 0.9641 - val_loss: 0.2410 - val_acc: 0.9471
Epoch 291/300
 - 48s - loss: 0.1727 - acc: 0.9636 - val_loss: 0.2409 - val_acc: 0.9469
Epoch 292/300
 - 48s - loss: 0.1748 - acc: 0.9625 - val_loss: 0.2404 - val_acc: 0.9467
Epoch 293/300
 - 48s - loss: 0.1736 - acc: 0.9635 - val_loss: 0.2408 - val_acc: 0.9468
Epoch 294/300
 - 48s - loss: 0.1755 - acc: 0.9634 - val_loss: 0.2406 - val_acc: 0.9470
Epoch 295/300
 - 48s - loss: 0.1782 - acc: 0.9626 - val_loss: 0.2406 - val_acc: 0.9467
Epoch 296/300
 - 48s - loss: 0.1730 - acc: 0.9634 - val_loss: 0.2407 - val_acc: 0.9465
Epoch 297/300
 - 48s - loss: 0.1744 - acc: 0.9632 - val_loss: 0.2407 - val_acc: 0.9464
Epoch 298/300
 - 48s - loss: 0.1738 - acc: 

In [11]:
import pickle
pickle.dump(model, open("FM-VGG-3.pkl", "wb"))

In [7]:
model1 = model