<h1> Define train_test function and a load function </h1>

In [None]:
import os
import cv2
import keras
import pickle
import numpy as np
import pandas as pd

def train_test_split(rd,train_ratio):

    fls = np.asarray(os.listdir(rd)) #Get Files
    fls = np.delete(fls,np.where(fls == '.DS_Store')) #Remove Store File
    np.random.shuffle(fls) #Get IID Arrays

    train_length = int(np.ceil(train_ratio * (len(fls))))
    test_length = len(fls) - train_length

    train = fls[0:train_length]
    test = fls[-test_length:]

    return train, test

def load(rd,file_names):

    label_map = {
        'Georgia':0,
        'Idaho':1,
        'Maine':2,
        'Utah':3 }

    X = np.zeros(shape = (len(file_names),150,150,3),dtype=np.float16)
    Y = np.zeros(shape=len(file_names), dtype=int)

    for idx,sub_dir in enumerate(file_names):

        # if('.DS' in sub_dir):
        #     continue

        dr = os.path.join(rd,sub_dir)
        im = cv2.imread(os.path.join(dr,"gsv_0.jpg"))
        im = im / 255

        X[idx,:,:,:] = im
        Y[idx] = label_map[sub_dir.split(" ")[0]]

        # meta = os.path.join(dr,'metadata.json')
        # if(json.load(open(meta))[0]['status'] == 'OK'):

    return X, Y

def load_set(fp):

  with open(fp,'rb') as fl:
    return pickle.load(fl)

def save_set(fp,l):

  with open(fp,'wb') as fl:
    pickle.dump(l,fl)

<h1> Setup Data Storage and Fetching </h1>

In [None]:
!git clone https://github.com/gpoulsen1775/Geoguesser.git

fatal: destination path 'Geoguesser' already exists and is not an empty directory.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<h1> Load in train-test data </h1>

In [None]:
rd = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/'
fns = ['X_train','Y_train','X_test','Y_test']

try: #Try to Load

  rs = []
  for fn in fns:

    rs.append(load_set(rd + fn))

  X_train,Y_train,X_test,Y_test = rs

except: #Create & Save

  trn, tst = train_test_split('/content/Geoguesser/extracted_data',.8) #Break Directories into a train and test set
  X_train,Y_train = load("/content/Geoguesser/extracted_data",trn) #Load the data set
  X_test,Y_test = load("/content/Geoguesser/extracted_data",tst) #Load the data set

  data = [X_train,Y_train,X_test,Y_test]

  for i,fn in enumerate(fns):

    save_set(rd+fn,data[i])

<h1> Define VGG16 Model </h1>

In [None]:
# #VGG16 MAX 60%
# from keras.layers.serialization import activation
# from keras.models import Sequential
# from keras import regularizers
# from keras import initializers
# import keras.layers as kl

# model =  Sequential()

# model.add(kl.Conv2D(input_shape=(150,150,3), filters=8, kernel_size=(5,5), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Input
# model.add(kl.Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 1
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 2
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 3
# model.add(kl.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 4
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 5
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 6
# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 7
# model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 8
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 9
# model.add(kl.BatchNormalization())

# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 10
# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 11
# model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01))) #Convolution 12
# model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2))) #Pool 13
# model.add(kl.BatchNormalization())

# model.add(kl.Flatten()) #Flatten output
# model.add(kl.Dropout(0.4)) #Add Dropout to Layer
# model.add(kl.Dense(units=514, activation='relu', kernel_initializer=initializers.HeNormal())) #Fully Connected 14 #DOUBLED -> Works slightly faster
# model.add(kl.Dropout(0.4)) #Add Dropout to Layer
# model.add(kl.Dense(units=514, activation='relu', kernel_initializer=initializers.HeNormal())) #Fully Connected 15 #DOUBLED -> Works slightly faster
# model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

# from keras.optimizers import Adam

# opt = Adam(learning_rate=0.0003)
# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
#Best Complex ~4 CNN Layer with all the defaults starting at 16

In [None]:
#68% By 13
from keras.layers.serialization import activation
from keras.models import Sequential
from keras import regularizers
from keras import initializers
import keras.layers as kl

model =  Sequential()

#Layer 1
model.add(kl.Conv2D(input_shape=(150,150,3), filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(4,4),strides=(4,4)))
model.add(kl.BatchNormalization())

#Layer 2
model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(3,3),strides=(3,3)))
model.add(kl.BatchNormalization())

#Layer 3
model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(kl.BatchNormalization())

#Layer 4
model.add(kl.Flatten())
model.add(kl.Dropout(0.4))
model.add(kl.Dense(units=32, activation='relu', kernel_initializer=initializers.HeNormal()))
model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

from keras.optimizers import Adam

opt = Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
#TEST
from keras.layers.serialization import activation
from keras.models import Sequential
from keras import regularizers
from keras import initializers
import keras.layers as kl

model =  Sequential()

#Layer 1
model.add(kl.Conv2D(input_shape=(150,150,3), filters=16, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(4,4),strides=(4,4)))
model.add(kl.BatchNormalization())

#Layer 2
model.add(kl.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(3,3),strides=(3,3)))
model.add(kl.BatchNormalization())

#Layer 3
model.add(kl.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(kl.MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(kl.BatchNormalization())

#Layer 4
model.add(kl.Flatten())
model.add(kl.Dropout(0.4))
model.add(kl.Dense(units=32, activation='relu', kernel_initializer=initializers.HeNormal()))
model.add(kl.Dense(units=4, activation='softmax')) #Softmax Output

from keras.optimizers import Adam

opt = Adam(learning_rate=0.0007)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# https://arxiv.org/pdf/2003.12843.pdf

In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 150, 150, 16)      448       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 37, 37, 16)       0         
 2D)                                                             
                                                                 
 batch_normalization_9 (Batc  (None, 37, 37, 16)       64        
 hNormalization)                                                 
                                                                 
 conv2d_10 (Conv2D)          (None, 37, 37, 32)        4640      
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 12, 12, 32)       0         
 g2D)                                                            
                                                      

<h1> Train Model </h1>

In [None]:
from keras.models import load_model

EPOCHS = 400
checkpoint_filepath = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/best_model.hdf5'
history_file = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/log.csv'

#Make History Saver
history_callback = keras.callbacks.CSVLogger(history_file, separator=",", append=True)

#Make Checkpoint
checkpoint_callback = keras.callbacks.ModelCheckpoint(

    filepath=checkpoint_filepath,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    save_weights_only=False)

#Make Early Stop
early_stop_callback = keras.callbacks.EarlyStopping(

    monitor='val_loss',
    patience=10,
    restore_best_weights=True)

#Try to load the model on the chance that the model has saved progress
try:
  model = load_model(checkpoint_filepath)
  log = pd.read_csv(history_file)

  with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,):
    print(log)

  EPOCHS = EPOCHS - len(log)

#If the saved model doesn't exist, then continue on...
except:
  pass

# Model weights are saved at the end of every epoch, if it's the best seen
# so far.
history = model.fit(x=X_train, y=Y_train, epochs=EPOCHS, callbacks=[checkpoint_callback,early_stop_callback,history_callback], validation_split=.1)

#Try Fit Generator if Data Augmentation is needed

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400

In [None]:
model = load_model(checkpoint_filepath)


In [None]:
rd = '/content/drive/MyDrive/Colab Notebooks/Geoguesser/'
fns = ['X_train','Y_train','X_test','Y_test']

rs = []
for fn in fns:

  rs.append(load_set(rd + fn))

X_train,Y_train,X_test,Y_test = rs