<h1> Define train_test function and a load function </h1>

In [2]:
import os
import cv2
import keras
import pickle
import numpy as np
import pandas as pd

def train_test_split(rd,train_ratio):

    fls = np.asarray(os.listdir(rd)) #Get Files
    fls = np.delete(fls,np.where(fls == '.DS_Store')) #Remove Store File
    np.random.shuffle(fls) #Get IID Arrays

    train_length = int(np.ceil(train_ratio * (len(fls))))
    test_length = len(fls) - train_length

    train = fls[0:train_length]
    test = fls[-test_length:]

    return train, test

def load(rd,file_names):

    label_map = {
        'Georgia':0,
        'Idaho':1,
        'Maine':2,
        'Utah':3 }

    X = np.zeros(shape = (len(file_names),150,150,3),dtype=np.float16)
    Y = np.zeros(shape=len(file_names), dtype=int)

    for idx,sub_dir in enumerate(file_names):

        # if('.DS' in sub_dir):
        #     continue

        dr = os.path.join(rd,sub_dir)
        im = cv2.imread(os.path.join(dr,"gsv_0.jpg"))
        im = im / 255

        X[idx,:,:,:] = im
        Y[idx] = label_map[sub_dir.split(" ")[0]]

        # meta = os.path.join(dr,'metadata.json')
        # if(json.load(open(meta))[0]['status'] == 'OK'):

    return X, Y

def load_set(fp):

  with open(fp,'rb') as fl:
    return pickle.load(fl)

def save_set(fp,l):

  with open(fp,'wb') as fl:
    pickle.dump(l,fl)

<h1> Setup Data Storage and Fetching </h1>

In [3]:
!git clone https://github.com/gpoulsen1775/Geoguesser.git

Cloning into 'Geoguesser'...
remote: Enumerating objects: 12912, done.[K
remote: Counting objects: 100% (12912/12912), done.[K
remote: Compressing objects: 100% (12907/12907), done.[K
remote: Total 12912 (delta 3), reused 12912 (delta 3), pack-reused 0
Receiving objects: 100% (12912/12912), 26.30 MiB | 12.57 MiB/s, done.
Resolving deltas: 100% (3/3), done.
Updating files: 100% (9742/9742), done.


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


<h1> Load in train-test data </h1>

In [5]:
rd = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/'
fns = ['X_train','Y_train','X_test','Y_test']

try: #Try to Load

  rs = []
  for fn in fns:

    rs.append(load_set(rd + fn))

  X_train,Y_train,X_test,Y_test = rs

except: #Create & Save

  trn, tst = train_test_split('/content/Geoguesser/extracted_data',.8) #Break Directories into a train and test set
  X_train,Y_train = load("/content/Geoguesser/extracted_data",trn) #Load the data set
  X_test,Y_test = load("/content/Geoguesser/extracted_data",tst) #Load the data set

  data = [X_train,Y_train,X_test,Y_test]

  for i,fn in enumerate(fns):

    save_set(rd+fn,data[i])

<h1> Define VGG16 Model </h1>

In [None]:
# #VGG16 MAX 60%
# from keras.layers.serialization import activation
# from keras.models import Sequential
# from keras import regularizers
# from keras import initializers
# import keras.layers as kl

# model =  Sequential()

# model.add(kl.Conv2D(input_shape=(150,150,3), filters=8, kernel_size=(5,5), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Input
# model.add(kl.Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 1
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 2
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 3
# model.add(kl.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 4
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 5
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 6
# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 7
# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 8
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 9
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 10
# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 11
# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 12
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 13
# model.add(kl.BatchNormalization())

# model.add(kl.Flatten()) #Flatten output
# model.add(kl.Dropout(0.4)) #Add Dropout to Layer
# model.add(kl.Dense(units=514, activation='relu', kernel_initializer=initializers.HeNormal())) #Fully Connected 14 #DOUBLED -> Works slightly faster
# model.add(kl.Dropout(0.4)) #Add Dropout to Layer
# model.add(kl.Dense(units=514, activation='relu', kernel_initializer=initializers.HeNormal())) #Fully Connected 15 #DOUBLED -> Works slightly faster
# model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

# from keras.optimizers import Adam

# opt = Adam(learning_rate=0.0003)
# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# #69% By 13
# from keras.layers.serialization import activation
# from keras.models import Sequential
# from keras import regularizers
# from keras import initializers
# import keras.layers as kl

# model =  Sequential()

# #Layer 1
# model.add(kl.Conv2D(input_shape=(150,150,3), filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
# model.add(kl.MaxPool2D(pool_size=(4,4),strides=(4,4)))
# model.add(kl.BatchNormalization())

# #Layer 2
# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
# model.add(kl.MaxPool2D(pool_size=(3,3),strides=(3,3)))
# model.add(kl.BatchNormalization())

# #Layer 3
# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(kl.BatchNormalization())

# #Layer 4
# model.add(kl.Flatten())
# model.add(kl.Dropout(0.4))
# model.add(kl.Dense(units=32, activation='relu', kernel_initializer=initializers.HeNormal()))
# model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

# from keras.optimizers import Adam

# opt = Adam(learning_rate=0.0005)
# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [6]:
#72% By 30. Slower, but uses less training accuracy to get as high of val_accuracy
#74 By 75
from keras.layers.serialization import activation
from keras.models import Sequential
from keras import regularizers
from keras import initializers
import keras.layers as kl

model =  Sequential()

#Layer 1
model.add(kl.Conv2D(input_shape=(150,150,3), filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(4,4),strides=(4,4)))
model.add(kl.BatchNormalization())

#Layer 2
model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(3,3),strides=(3,3)))
model.add(kl.BatchNormalization())

#Layer 3
model.add(kl.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(kl.BatchNormalization())

#Layer 4
model.add(kl.Flatten())
model.add(kl.Dropout(0.7))
model.add(kl.Dense(units=32, activation='relu', kernel_initializer=initializers.HeNormal()))
model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

from keras.optimizers import Adam

opt = Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [8]:
#72% By 30. Slower, but uses less training accuracy to get as high of val_accuracy
#74 By 75
from keras.layers.serialization import activation
from keras.models import Sequential
from keras import regularizers
from keras import initializers
import keras.layers as kl

model =  Sequential()

#Layer 1
model.add(kl.Conv2D(input_shape=(150,150,3), filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(4,4),strides=(4,4)))
model.add(kl.BatchNormalization())

#Layer 2
model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(3,3),strides=(3,3)))
model.add(kl.BatchNormalization())

#Layer 3
model.add(kl.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(kl.BatchNormalization())

#Layer 4
model.add(kl.Flatten())
model.add(kl.Dropout(0.5))
model.add(kl.Dense(units=32, activation='relu', kernel_initializer=initializers.HeNormal()))
model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

from keras.optimizers import Adam

opt = Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [25]:
# https://arxiv.org/pdf/2003.12843.pdf

In [9]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 150, 150, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 37, 37, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization_6 (Batc  (None, 37, 37, 32)       128       
 hNormalization)                                                 
                                                                 
 conv2d_7 (Conv2D)           (None, 37, 37, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 12, 12, 64)       0         
 2D)                                                             
                                                      

<h1> Train Model </h1>

In [10]:
from keras.models import load_model

EPOCHS = 400
checkpoint_filepath = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/best_model.hdf5'
history_file = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/log.csv'

#Make History Saver
history_callback = keras.callbacks.CSVLogger(history_file, separator=",", append=True)

#Make Checkpoint
checkpoint_callback = keras.callbacks.ModelCheckpoint(

    filepath=checkpoint_filepath,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    save_weights_only=False)

#Make Early Stop
early_stop_callback = keras.callbacks.EarlyStopping(

    monitor='val_loss',
    patience=10,
    restore_best_weights=True)

#Try to load the model on the chance that the model has saved progress
try:
  model = load_model(checkpoint_filepath)
  log = pd.read_csv(history_file)

  with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,):
    print(log)

  EPOCHS = EPOCHS - len(log)

#If the saved model doesn't exist, then continue on...
except:
  pass

# Model weights are saved at the end of every epoch, if it's the best seen
# so far.
history = model.fit(x=X_train, y=Y_train, epochs=EPOCHS, callbacks=[checkpoint_callback,early_stop_callback,history_callback], validation_split=.1)

#Try Fit Generator if Data Augmentation is needed

    epoch  accuracy   loss  val_accuracy  val_loss
0       0     0.441  1.922         0.354     1.820
1       1     0.546  1.508         0.415     2.031
2       2     0.573  1.414         0.403     1.799
3       3     0.627  1.284         0.449     1.542
4       4     0.639  1.207         0.479     1.562
5       5     0.645  1.150         0.567     1.286
6       6     0.658  1.100         0.559     1.369
7       7     0.675  1.031         0.582     1.188
8       8     0.696  0.994         0.592     1.276
9       9     0.706  0.938         0.621     1.159
10     10     0.715  0.891         0.585     1.312
11     11     0.744  0.848         0.613     1.166
12     12     0.752  0.807         0.595     1.396
13     13     0.741  0.803         0.597     1.235
14     14     0.764  0.769         0.597     1.165
15     15     0.769  0.758         0.605     1.260
16     16     0.775  0.726         0.651     1.147
17     17     0.798  0.682         0.562     1.523
18     18     0.797  0.677     

KeyboardInterrupt: ignored

In [24]:
model = load_model('/content/drive/MyDrive/Colab Notebooks/Geoguesser/Best So Far/best_model.hdf5')
op = model.predict(X_test)

preds = [np.argmax(rw) for rw in op]

correct = 0

for i,v1 in enumerate(preds):

    if(v1 == Y_test[i]):
      correct += 1

print( "Training Accuracy Evaluates to: " + str(correct / len(preds)) )

Training Accuracy Evaluates to: 0.7183967112024666
