## Train and Evaluate Classifier

In [None]:
import pathlib
import warnings
import pickle
import sys

from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

sys.path.append('..')  # dirty
from cats_and_dogs_utils import construct_model, plot_roc

%matplotlib inline

In [None]:
# set up image data generators for train, val, test

data_dir = pathlib.Path().cwd().parent / 'data'

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 128

train_generator = train_datagen.flow_from_directory(
        data_dir / 'train',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
        data_dir / 'val',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
)

test_generator = test_datagen.flow_from_directory(
        data_dir / 'test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
)

In [None]:
# define model architecture

model = construct_model()

In [None]:
# train model

epochs = 200

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message=".*EXIF")

    history = model.fit_generator(
          train_generator,
          steps_per_epoch=10000 // batch_size + 1,
          epochs=epochs,
          validation_data=val_generator,
          validation_steps=1500 // batch_size + 1)
    
    with (data_dir / f'history_{epochs}.pickle').open('wb') as file:
        pickle.dump(history.history, file)
    
    model.save(str(data_dir / f'cats_and_dogs_{epochs}.h5'))

In [None]:
# plot training history 

epochs = 200
with (data_dir / f'history_{epochs}.pickle').open('rb') as file:
    training_history = pickle.load(file)

acc = training_history['acc']
val_acc = training_history['val_acc']
loss = training_history['loss']
val_loss = training_history['val_loss']

plt.plot(val_acc, color='green', label='Validation accuracy')
plt.plot(acc, color='blue', label='Training accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Training epoch')
plt.legend()

plt.figure()
plt.plot(val_loss, color='green', label='Validation loss')
plt.plot(loss, color='blue', label='Training loss')
plt.title('Training and validation loss')
plt.xlabel('Training epoch')
plt.legend()
plt.show()

In [None]:
# evaluate model on test

epochs = 200
model.load_weights(data_dir / f'cats_and_dogs_{epochs}.h5')

test_data = test_generator
test_data_len = 2000

imgs = np.array([])
labels = np.array([])
preds = np.array([])

for i, (imgs_batch, labels_batch) in enumerate(test_data):
    preds_batch = model.predict(imgs_batch)[:, 0]
    if i == 0:
        imgs = imgs_batch
        labels = labels_batch
        preds = preds_batch
    else:
        imgs = np.concatenate([imgs, imgs_batch], axis=0)
        labels = np.concatenate([labels, labels_batch])
        preds = np.concatenate([preds, preds_batch])
    if i == test_data_len // batch_size:
        break

plot_roc('', labels, preds)