In [None]:
from keras.preprocessing import image
from keras.applications import resnet50
%matplotlib inline
import os, math
import numpy as np
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation
#following added by JMS 
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.core import Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.models import load_model
from keras.layers.convolutional import *
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import itertools
import matplotlib.pyplot as plt
from keras.models import load_model
import h5py

In [None]:
def label_names(label_vector, classes):
    assert label_vector.size == len(classes)
    l = []
    for i,x in enumerate(label_vector):
        if x==1:
            l.append(classes[i])
    return l

In [None]:
# Code from fastai
def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None, classes=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if (ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims)//rows + 1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(label_names(titles[i], classes), fontsize=8)
        plt.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
# Code from StackOverflow post
def zca_whitening_matrix(X):
    """
    Function to compute ZCA whitening matrix (aka Mahalanobis whitening).
    INPUT:  X: [M x N] matrix.
        Rows: Variables
        Columns: Observations
    OUTPUT: ZCAMatrix: [M x M] matrix
    """
    # Covariance matrix [column-wise variables]: Sigma = (X-mu)' * (X-mu) / N
    sigma = np.cov(X, rowvar=True) # [M x M]
    # Singular Value Decomposition. X = U * np.diag(S) * V
    U,S,V = np.linalg.svd(sigma)
        # U: [M x M] eigenvectors of sigma.
        # S: [M x 1] eigenvalues of sigma.
        # V: [M x M] transpose of U
    # Whitening constant: prevents division by zero
    epsilon = 1e-5
    # ZCA Whitening matrix: U * Lambda * U'
    ZCAMatrix = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + epsilon)), U.T)) # [M x M]
    return ZCAMatrix

In [None]:
def whiten_batch(X):
    X_norm = np.multiply(X, 1.0/255.0, casting="unsafe")
    flatx = np.reshape(X_norm, (-1, np.prod(X_norm.shape[-3:])))
    ZCAMatrix = zca_whitening_matrix(flatx)
    ZCAMatrix.shape
    xZCAMatrix = np.dot(ZCAMatrix, flatx) # project X onto the ZCAMatrix
    X_hat = np.reshape(xZCAMatrix, X.shape)
    return X_hat

<h2>Loading Data</h2>
<br>

In [None]:
TRAIN_BATCH_SIZE = 32


In [None]:
train_path = "train.h5"
valid_path = "validate.h5"

In [None]:
with h5py.File(train_path, 'r') as hf:
        X_train, y_train = hf['images'][:], hf['labels'][:]

with h5py.File(valid_path, 'r') as hf:
        X_valid, y_valid = hf['images'][:], hf['labels'][:]
        

In [None]:
# X_valid = scaler.fit_transform(X_valid)

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

In [None]:
datagen.fit(X_train)

In [None]:
classes = ['backyard','ballroom','barn_inside','barn_outside','beach','castle','country_club','cruise','estate',
           'farm_+_ranch_outside','garden','historic_venue','hotel_outside','library','mountain','museum_inside',
           'park', 'restaurant','rooftop','tented','vineyard','waterfront']

<h2>Creating/Loading Pre-Trained ResNet Model</h2>
<br>

In [None]:
# Load Keras' ResNet50 model that was pre-trained against the ImageNet database
model = resnet50.ResNet50()

In [None]:
#take off last layer
model.layers.pop()

#freeze layers
for layer in model.layers[:-2]:
    layer.trainable=False

#make last layer    
last = model.layers[-1].output
x = Dense(len(classes), activation="softmax")(last)

In [None]:
#following taken from https://github.com/sebastianbk/finetuned-resnet50-keras/blob/master/resnet50_train.py

In [None]:
#set training steps based on the # of images
num_train_samples = X_train.shape[0]
# num_valid_samples = sum([len(files) for r, d, files in os.walk(validate_path)])
num_train_samples


In [None]:
num_train_steps = math.floor(num_train_samples/TRAIN_BATCH_SIZE)
# num_valid_steps = math.floor(num_valid_samples/10)
num_train_steps

<h2>Compile and Train model</h2>
<br>

In [None]:
def hamming_score(y_true, y_pred):
    '''
    Compute the Hamming score (a.k.a. label-based accuracy) for the multi-label case
    https://stackoverflow.com/q/32239577/395857
    '''
    normalize=True 
    sample_weight=None
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/\
                    float( len(set_true.union(set_pred)) )
        acc_list.append(tmp_a)
    return np.mean(acc_list)

In [None]:
#finetune model 
finetuned_model = Model(model.input, x)
finetuned_model.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])

#early stopping
# early_stopping = EarlyStopping(patience=10)
# checkpointer = ModelCheckpoint('resnet50_multi_best.h5', verbose=1, save_best_only=True)

#fit and save
# finetuned_model.fit_generator(train_batches, steps_per_epoch=num_train_steps+2, epochs=1000)#, callbacks=[early_stopping, checkpointer])#, validation_data=validate_batches, validation_steps=num_valid_steps-2)
epochs = 40

    
for e in range(epochs):
    print('Epoch', e)
    batches = 0
    for X_batch, y_batch in datagen.flow(X_train, y_train, batch_size=TRAIN_BATCH_SIZE):
#         X_batch_whitened = whiten_batch(X_batch)
        finetuned_model.fit(X_batch, y_batch)
        batches += 1
        print("batch:", batches)
        if batches >= len(X_train) / 32:
            # we need to break the loop by hand because
            # the generator loops indefinitely
            break

    y_valid_probs = finetuned_model.predict(X_valid, True)
    y_valid_pred = np.around(y_valid_probs, decimals=0)
    accuracy = hamming_score(y_valid, y_valid_pred)
    print("Accuracy",accuracy)
    
finetuned_model.save('resnet50_team1_take1.h5')

<h2>Testing the Model</h2>