In [None]:
## Load the required modules
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split, PredefinedSplit, cross_validate

import tensorflow as tf
from tensorflow import keras
!pip install keras-tuner --upgrade
import keras_tuner

from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
!pip install scikit-optimize --upgrade
from skopt import BayesSearchCV
from sklearn.metrics import precision_score, recall_score
import pickle as pkl
import collections

Collecting keras-tuner
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 7.0 MB/s 
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.1.2 kt-legacy-1.0.4
Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[K     |████████████████████████████████| 100 kB 3.7 MB/s 
Collecting pyaml>=16.9
  Downloading pyaml-21.10.1-py2.py3-none-any.whl (24 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.10.1 scikit-optimize-0.9.0


In [None]:
## Mount the google drive
from google.colab import drive
drive.mount('/content/drive/')

## Set the directory
########## First, create a shortcut of "streetviews2/arrs_pkl" folder in your drive ########
%cd /content/drive/MyDrive/streetviews2/arrs_pkl/

Mounted at /content/drive/
/content/drive/.shortcut-targets-by-id/1gnVV0eOEygqj9_wnneoTbAKcSf-W4nJS/streetviews2/arrs_pkl


## **Load the imaging Dataset**

In [None]:
## Images directory and their labels
img_arr_filename_lst = ['img0_arr.csv', 'img1_arr.csv', 'img2_arr.csv', 'img3_arr.csv', 'img4_arr.csv', 'img5_arr.csv']
pci_arr_filename_lst = ['pci0_arr.csv', 'pci1_arr.csv', 'pci2_arr.csv', 'pci3_arr.csv', 'pci4_arr.csv', 'pci5_arr.csv']

img = []
label = []

for i in range(len(img_arr_filename_lst)):
  with open(img_arr_filename_lst[i],'rb') as f:
    img.append(pkl.load(f).tolist())

  with open(pci_arr_filename_lst[i],'rb') as f1:
    label.append(pkl.load(f1).tolist())

train_images = []
train_labels = []

for i in img:
  for j in i:
    train_images.append(j)

for i in label:
  for j in i:
    train_labels.append(j)

train_labels[155] = 5

X = np.array(train_images)
y = np.array(train_labels)

## Split the dataset into training, validation, and test sets
X, X_test, y, y_test = train_test_split(X, y, test_size=1/3, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1/2, random_state=0)

In [None]:
img_test = []
with open('img_first_1000_arr.csv','rb') as f:
  img_test.append(pkl.load(f).tolist())

len(img_test)

1

## Model Collection

In [None]:
## Validation and test predictions of the select models
validation_predictions = {}
test_predictions = {}

In [None]:
## Model1: Fully-Connected Neural Network
def build_model_FC(hp):
  model = keras.Sequential()
  model.add(tf.keras.layers.InputLayer(input_shape=(133, 133, 3)))
  model.add(tf.keras.layers.Rescaling(1./255))
  model.add(tf.keras.layers.Flatten())
  n_layers = hp.Int("num_layers", 1, 5)
  for i in range(n_layers):
    model.add(tf.keras.layers.Dense(units=hp.Choice("units_" + str(i), values=[16, 64, 256, 1024]), activation="relu"))

  model.add(tf.keras.layers.Dropout(hp.Float('dropout', min_value=0.0, max_value=0.99, default=0.3,step=0.05)))

  model.add(keras.layers.Dense(6, activation='softmax')) # 6 classes (multiclass classification)

  model.compile(optimizer=tf.keras.optimizers.Adam(hp.Float("learning_rate",min_value=1e-5, max_value=1,sampling="log",default=1e-2)),
      loss="categorical_crossentropy",
      metrics=[tf.keras.metrics.AUC(),
               tf.keras.metrics.Recall(),
               tf.keras.metrics.Precision()])
  return model

## Random Search tuner
tuner = keras_tuner.RandomSearch(build_model_FC, objective=keras_tuner.Objective("val_auc", direction="max"), max_trials=10, executions_per_trial=1,overwrite=True)

tuner.search(X_train, tf.keras.utils.to_categorical(y_train),
             batch_size=64,epochs=20,validation_data=(X_val, tf.keras.utils.to_categorical(y_val)),
             callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=10)])

## # of models we try
n_model = 2
best_models_FC = tuner.get_best_models(num_models=n_model)
best_hyperparamters_FC = tuner.get_best_hyperparameters(num_trials=n_model)

df_FC = None
for i in range(n_model):
    model = best_models_FC[i]
    validation_predictions['FC_{}'.format(i)] = model.predict(X_val)
    test_predictions['FC_{}'.format(i)] = model.predict(X_test)

    d = collections.OrderedDict(sorted(best_hyperparamters_FC[i].values.items()))
    df = pd.DataFrame.from_dict(d, orient='index', columns=['FC_{}'.format(i)])
    df.loc['Loss'], df.loc['AUC'], _, _ = \
     model.evaluate(X_val, tf.keras.utils.to_categorical(y_val), verbose=0)
    df_FC = pd.concat((df_FC, df), axis=1)

df_FC.to_csv('df_FC.csv')

Trial 10 Complete [00h 00m 20s]
val_auc: 0.5539094805717468

Best val_auc So Far: 0.5656721591949463
Total elapsed time: 00h 02m 05s
INFO:tensorflow:Oracle triggered exit


In [None]:
### Model2: Convolutional Neural Network

def build_model_CN(hp):
  model = keras.Sequential()
  model.add(tf.keras.layers.InputLayer(input_shape=(133, 133, 3)))
  model.add(tf.keras.layers.Rescaling(1./255))
  n_conv_layers = hp.Int("num_conv_layers", 1, 4)
  for i in range(n_conv_layers):
     model.add(tf.keras.layers.Conv2D(filters=hp.Choice("filters_" + str(i), values=[16, 32, 64]),
             kernel_size=hp.Choice("kernel_size_" + str(i),values=[3, 5]), activation='relu',padding='same'))
     model.add(tf.keras.layers.BatchNormalization())
     model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
     model.add(tf.keras.layers.Dropout(hp.Float('dropout_' + str(i), min_value=0.0, max_value=0.99,default=0.3, step=0.05)))
  model.add(tf.keras.layers.Flatten())
  model.add(keras.layers.Dense(6, activation='softmax'))
  model.compile(
      optimizer=tf.keras.optimizers.Adam(hp.Float("learning_rate",min_value=1e-5,max_value=1,sampling="log",default=1e-2)),loss="categorical_crossentropy",
      metrics=[
               tf.keras.metrics.AUC(),
               tf.keras.metrics.Recall(),
               tf.keras.metrics.Precision()
              ])
  return model

## Random Search tuner
tuner = keras_tuner.RandomSearch(
    build_model_CN,
    objective=keras_tuner.Objective("val_auc", direction="max"),
    max_trials=10,
    executions_per_trial=1,
    overwrite=True
)

tuner.search(
             X_train, tf.keras.utils.to_categorical(y_train),
             batch_size=64,
             epochs=20,
             validation_data=(X_val, tf.keras.utils.to_categorical(y_val)),
             callbacks=[tf.keras.callbacks.EarlyStopping("val_auc", patience=10)]
             )

## The number of models we keep
n_model = 5
best_models_CN = tuner.get_best_models(num_models=n_model)
best_hyperparamters_CN = tuner.get_best_hyperparameters(num_trials=n_model)

df_CN = None
for i in range(n_model):
    model = best_models_CN[i]

    # validation_predictions['CN_{}'.format(i)] = np.argmax(model.predict(X_val), axis=1)
    validation_predictions['CN_{}'.format(i)] = model.predict(X_val)
    # test_predictions['CN_{}'.format(i)] = np.argmax(model.predict(X_test), axis=1)
    test_predictions['CN_{}'.format(i)] = model.predict(X_test)
    # print('X_test', model.predict(X_test))
    # print('length X_test', len(model.predict(X_test)))

    d = collections.OrderedDict(sorted(best_hyperparamters_CN[i].values.items()))
    df = pd.DataFrame.from_dict(d, orient='index', columns=['CN_{}'.format(i)])
    df.loc['Loss'], df.loc['AUC'], _, _ = \
     model.evaluate(X_val, tf.keras.utils.to_categorical(y_val), verbose=0)
    df_CN = pd.concat((df_CN, df), axis=1)

df_CN.to_csv('df_CN.csv')

Trial 10 Complete [00h 00m 05s]
val_auc: 0.5569959282875061

Best val_auc So Far: 0.5691357851028442
Total elapsed time: 00h 01m 16s
INFO:tensorflow:Oracle triggered exit


In [None]:
## Save the validation and test predictions as .pckl files
f = open('validation_predictions.pckl', 'wb')
pickle.dump(validation_predictions, f)
f.close()

f = open('test_predictions.pckl', 'wb')
pickle.dump(test_predictions, f)
f.close()

f = open('y_val.pckl', 'wb')
pickle.dump(y_val, f)
f.close()