In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob
from keras.preprocessing import image                  
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
from __future__ import absolute_import, division, print_function
import os 

os.chdir('/home/aind2/root/GoT')

In [3]:
# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    GoT_files = np.array(data['filenames'])
    GoT_targets = np_utils.to_categorical(np.array(data['target']), 2)
    return GoT_files, GoT_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('GoT-images/train')
valid_files, valid_targets = load_dataset('GoT-images/valid')
# test_files, test_targets = load_dataset('GoT-images/test') <- Need to scrape IG Discover page for test set

In [4]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [5]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
# test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|██████████| 37661/37661 [12:00<00:00, 52.27it/s]
100%|██████████| 7617/7617 [02:31<00:00, 50.35it/s]


In [7]:
from keras.layers import Conv2D, Dropout, Flatten, Dense, Activation
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization 

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=3, strides=2, padding='same', input_shape=(224,224,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=32, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=64, kernel_size=3, strides=2, padding='same')) 
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=128, kernel_size=3, strides=2, padding='same')) 
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=256, kernel_size=3, strides=2, padding='same')) 
model.add(BatchNormalization())
model.add(Activation('relu'))


model.add(Flatten())

model.add(Dense(1000))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 112, 112, 16)      448       
_________________________________________________________________
batch_normalization_2 (Batch (None, 112, 112, 16)      64        
_________________________________________________________________
activation_1 (Activation)    (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 56, 56, 32)        4640      
_________________________________________________________________
batch_normalization_3 (Batch (None, 56, 56, 32)        128       
_________________________________________________________________
activation_2 (Activation)    (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 28, 28, 64)        18496     
__________

In [8]:
from keras.optimizers import Adam

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.load_weights('saved_models/benchmark_model.hdf5')

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

#benchmark model

epochs = 50

checkpointer = ModelCheckpoint(filepath='saved_models/benchmark_model.hdf5', 
                               verbose=1, save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss', patience=5)

model.fit(train_tensors, train_targets, validation_data=(valid_tensors, valid_targets), epochs=epochs, batch_size=32, callbacks=[checkpointer,early_stop], verbose=1)



Train on 37661 samples, validate on 7617 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50

In [9]:
from keras.callbacks import ModelCheckpoint, EarlyStopping


#benchmark model resume training (AWS instance connection was dropped)

epochs = 40

checkpointer = ModelCheckpoint(filepath='saved_models/benchmark_model.hdf5', 
                               verbose=1, save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss', patience=5)

model.fit(train_tensors, train_targets, validation_data=(valid_tensors, valid_targets), epochs=epochs, batch_size=32, callbacks=[checkpointer,early_stop], verbose=1)

Train on 37661 samples, validate on 7617 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40


<keras.callbacks.History at 0x7f2cdcab7ef0>