In [1]:
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import TopKCategoricalAccuracy

In [2]:
train_datagen = image.ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    '../input/bigdata/bigDataCopy/train',
    target_size=(224, 224),
    batch_size=100,
    class_mode='categorical',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    '../input/bigdata/bigDataCopy/train', # same directory as training data
    target_size=(224, 224),
    batch_size=100,
    class_mode='categorical',
    subset='validation',
    shuffle=False) # set as validation data


In [4]:
conv_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
x = Flatten()(conv_model.output)
x = Dense(100, activation='relu')(x)
x = Dense(100, activation='relu')(x)
x = Dropout(0.05)(x)
x = Dense(100, activation='relu')(x)
x = Dropout(0.05)(x)
predictions = Dense(16, activation='softmax')(x)

full_model = Model(inputs=conv_model.input, outputs=predictions)
for layer in conv_model.layers:
    layer.trainable = False
full_model.summary()

In [8]:
full_model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.001),
                  metrics=['acc', TopKCategoricalAccuracy(k=3)])
history = full_model.fit_generator(
    train_generator, 
    validation_data = validation_generator,
    workers=10,
    epochs=20
)

In [31]:
import matplotlib.pyplot as plt

def plot_history(history, yrange):
    '''Plot loss and accuracy as a function of the epoch,
    for the training and validation datasets.
    '''
    acc = history.history['top_k_categorical_accuracy']
    val_acc = history.history['val_top_k_categorical_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Get number of epochs
    epochs = range(len(acc))

    # Plot training and validation accuracy per epoch
    plt.plot(epochs, acc, label="Training")
    plt.plot(epochs, val_acc, label="Validation")
    plt.gca().legend()
    plt.title('Training and validation accuracy')
    plt.ylim(yrange)
    
    # Plot training and validation loss per epoch
    plt.figure()

    plt.plot(epochs, loss, label="Training")
    plt.plot(epochs, val_loss, label="Validation")
    plt.gca().legend()
    plt.title('Training and validation loss')
    plt.show()
    

plot_history(history, yrange=(0.5,1) )

In [17]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
true_labels = validation_generator.classes
predictions = full_model.predict_generator(validation_generator)
y_true = true_labels
y_pred = np.argmax(predictions, axis=1)

In [38]:
plt.rcParams["figure.figsize"] = (18,18)
#The dataset is private so I can't see the order of the rest of the cities in our dataset, but just fill them out in the array for the 
#confusion matrix
#Also made the figure bigger to fit the normalized numbers and the labels.
cities = ["Amsterdam", "Austin", "Boston", "Budapest", "Helsinki", "London", "Manila", "Melbourne", "Miami", "Phoenix", "San Francisco", "Sao Paolo", "Tokyo", "Tronto", "Trondheim", "Zurich",]
#Might want to set normalize to 'true' if we want results normalized out of all guesses for each true or predicted label, instead of total sample.
disp = ConfusionMatrixDisplay.from_predictions(y_true, y_pred, normalize="true", display_labels=cities)
plt.xticks(rotation=45)
plt.show()

In [25]:
model_json = full_model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
full_model.save_weights("model.h5")
