In [1]:
%matplotlib inline
import numpy as np
import time
import h5py
import keras
import pandas as pd
import math
import joblib
import matplotlib.pyplot as plt

from fuel.datasets.hdf5 import H5PYDataset

from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

from IPython.display import display

from keras.layers import Input, Dense, Lambda, Flatten, Reshape, BatchNormalization, Activation, Dropout
from keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, UpSampling2D

from keras.callbacks import EarlyStopping
from keras.optimizers import RMSprop, Adam, SGD
from keras.models import Model, Sequential
from keras.utils import np_utils
from keras import backend as K
from keras_tqdm import TQDMNotebookCallback

from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K


Using TensorFlow backend.


In [2]:
ftrain = H5PYDataset("../../data/cifar10/cifar10.hdf5", which_sets=('train',))
X_train, y_train = ftrain.get_data(ftrain.open(), slice(0, ftrain.num_examples))
X_train = np.moveaxis(X_train[:], 1, 3) / 255.

ftest = H5PYDataset("../../data/cifar10/cifar10.hdf5", which_sets=('test',))
X_test, y_test = ftest.get_data(ftest.open(), slice(0, ftest.num_examples))
X_test = np.moveaxis(X_test[:], 1, 3) / 255.
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000, 1)
(10000, 32, 32, 3) (10000, 1)


In [3]:
# input image dimensions
img_rows, img_cols, img_chns = 32, 32, 3

# number of convolutional filters to use
num_classes = 10

batch_size = 100
original_img_size = (img_rows, img_cols, img_chns)
epochs = 1000

In [4]:
def create_model():
    x = Input(shape=(32, 32, 3))
    up = UpSampling2D(size=7)(x)
    
    base_model = InceptionV3(input_tensor=up, weights='imagenet', include_top=False)
    avg_pool = GlobalAveragePooling2D()(base_model.output)
    dense_1 = Dense(512, activation='relu')(avg_pool)
    dropout_1 = Dropout(0.5)(dense_1)
    y = Dense(10, activation='softmax')(dropout_1)
    
    model = Model(inputs=x, outputs=y)
    
    for layer in base_model.layers:
        layer.trainable = False
    opt = RMSprop(lr=0.0001, decay=1e-6)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

create_model().summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 32, 32, 3)     0                                            
____________________________________________________________________________________________________
up_sampling2d_1 (UpSampling2D)   (None, 224, 224, 3)   0           input_1[0][0]                    
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 111, 111, 32)  864         up_sampling2d_1[0][0]            
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 111, 111, 32)  96          conv2d_1[0][0]                   
___________________________________________________________________________________________

In [5]:
results = []
for sample_size in [1000, 2000, 5000, 10000, 25000, 50000]:
    start = time.time()
    print('Fitting with sample_size: {}'.format(sample_size))
   
    if sample_size < len(X_train):
        sss = StratifiedShuffleSplit(n_splits=2, test_size=sample_size / len(X_train), random_state=0)
        _, index = sss.split(X_train, y_train)
        X, y = X_train[index[1]], y_train[index[1]]
    else:
        X, y = X_train, y_train
   
    y = np_utils.to_categorical(y)
    model = create_model()
    model.fit(X, y, shuffle=True, 
              epochs=epochs,
              batch_size=batch_size,
              verbose=0,
              callbacks=[TQDMNotebookCallback(), 
                         EarlyStopping(monitor='loss', min_delta=0.01, patience=50)])
    
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    score = accuracy_score(y_test, y_pred)
    
    end = time.time()
    elapsed = end - start
    print(' * Accuracy: %.1f %%' % (100. * score))
    print(' * Fit time elapsed: %.1fs' % elapsed)
    results.append({'sample_size': sample_size, 'accuracy': score, 'time': elapsed})

Fitting with sample_size: 1000

 * Accuracy: 66.1 %
 * Fit time elapsed: 1039.5s
Fitting with sample_size: 2000

 * Accuracy: 68.4 %
 * Fit time elapsed: 1838.2s
Fitting with sample_size: 5000

 * Accuracy: 72.8 %
 * Fit time elapsed: 5468.5s
Fitting with sample_size: 10000

 * Accuracy: 75.1 %
 * Fit time elapsed: 11303.0s
Fitting with sample_size: 25000

 * Accuracy: 77.3 %
 * Fit time elapsed: 25340.2s
Fitting with sample_size: 50000

 * Accuracy: 78.6 %
 * Fit time elapsed: 58643.2s


In [6]:
df = pd.DataFrame(results)
display(df)
df.to_csv('inception_results.csv', index=False)

Unnamed: 0,accuracy,sample_size,time
0,0.6612,1000,1039.488296
1,0.684,2000,1838.208698
2,0.7278,5000,5468.546321
3,0.7506,10000,11303.025796
4,0.7728,25000,25340.225941
5,0.7862,50000,58643.150445
