# Cifar10 - Aumento de dados

Repetir o treinamento feito com os 2000 dados de treinamento feito na semana passada, porém agora com a técnica de aumento de dados mostrado na classe

In [71]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
%matplotlib inline

In [72]:
data = np.load('/etc/jupyterhub/ia368z_2s2017/datasets/cifar10-redux.npz')

In [73]:
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']

In [74]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((2000, 3, 32, 32), (2000,), (500, 3, 32, 32), (500,))

In [75]:
num_classes = 3
y_train = keras.utils.to_categorical(y_train-3, num_classes)
y_test_categorical = y_test-3
y_test = keras.utils.to_categorical(y_test-3, num_classes)

In [76]:
def SmallCNN(nclasses,input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',input_shape=input_shape,data_format="channels_first"))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Flatten())
    model.add(Dense(120))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(nclasses))
    model.add(Activation('softmax'))
    return model


In [77]:
# Main cell
model = SmallCNN(3,(3, 32, 32))

# initiate RMSprop optimizer
#opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
opt = keras.optimizers.Adam()
# initiate the Early Stop callback
callbacks = []
callbacks.append(EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto'))

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# Data normalization
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.

# Split the X_train to validation
X_val = X_train[1500:]
y_val = y_train[1500:]
X_train = X_train[0:1499]
y_train = y_train[0:1499]

# The datagen will generate more sample from our previous dataset
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,data_format="channels_first")  # randomly flip images

datagen.fit(X_train)


In [None]:
model.fit_generator(datagen.flow(X_train, y_train,batch_size=30),
                    steps_per_epoch=X_train.shape[0] // 16,
                    epochs=25,
                    validation_data=(X_val, y_val),callbacks=callbacks)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25

In [62]:
# Run the model in X_test
evaluation = model.evaluate(X_test,y_test)
print(evaluation[1])

0.622000000954


In [None]:
# Check for the correct classifications and with less prediction probability
prediction = model.predict(X_test)
prediction_proba = model.predict_proba(X_test)

In [None]:
prediction_class_index = []
prediction_correct = (np.argmax(prediction,axis=1)==y_test_categorical)
for i in range(len(prediction_correct)):
    if prediction_correct[i]:
        prediction_class_index.append((prediction_proba[i].max(),i))


In [None]:
dtype=np.dtype([('prob',float),('index',int)])
less_prob_predictions = np.sort(np.array(prediction_class_index,dtype=dtype),order="prob")[0:5]

In [None]:
for i in range(5):
    idx = less_prob_predictions[i][1]
    prob = less_prob_predictions[i][0]
    plt.subplot(1, 5, i + 1)
    plt.imshow(X_test[idx].transpose((1,2,0)))
    plt.title('{}:{}:{:0.3f}'.format(y_test_categorical[idx],np.argmax(prediction,axis=1)[idx],prob))
    plt.axis('off')
plt.savefig('cifar_fig.png')