In [1]:
import keras
from keras.models import load_model

import cv2
import numpy as np
import pandas as pd

from __future__ import division
import os, os.path
from collections import defaultdict

Using TensorFlow backend.


### Load Test Set Images & Turn into DataFrame

In [2]:
main_path = "../02_Data/06_Test_Train_Val/test"
image_dict = defaultdict()

val_path, val_dirs, val_files = next(os.walk(main_path))
for zoning_type in val_dirs:
    zoning_path, zoning_dirs, zoning_files = next(os.walk(main_path + "/" + zoning_type))
    for file in zoning_files:
        key = int(file[-10:-4])
        total_path = zoning_path + "/" + file
        image_dict[key] = [zoning_type,total_path]

In [4]:
image_df = pd.DataFrame.from_dict(image_dict,orient='index')
image_df.columns = ["zoning","filepath"]
image_df.head()

Unnamed: 0,zoning,filepath
143182,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...
163183,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...
126053,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...
158142,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...
63081,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...


### Load Final CNN Models for Ensembling

In [32]:
CNN_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v1.h5')
CNN_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v2.h5')
CNN_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v3.h5')
CNN_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v4.h5')
CNN_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v5.h5')

In [33]:
CNN_models = [CNN_model_1, CNN_model_2, CNN_model_3, CNN_model_4, CNN_model_5]

### Zoning Dictionary

In [3]:
zoning_types = list(np.sort(val_dirs))   # the directories are the zoning type classifications
                                         # which I copied from the notebook "05_Modelling_CNN_Scratch"
                                         # from the code: train_generator.class_indices

### Prediction Function - Single CNN Model

In [34]:
def CNN_predict(row, model):
    img = cv2.imread(row, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, dsize=(150, 150))                                     # resize image to 150 for processing 
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],1)
    return np.round(model.predict(img_array),3)

### Prediction Function - Ensembled CNN Models

In [35]:
def CNN_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                          # randomness to help break ties
        predict_ensemble = predict_ensemble + CNN_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)

### Run the Prediction on the Validation Set

In [36]:
image_df["CNN_ens"] = image_df["filepath"].apply(CNN_ensemble_predict, model_list= CNN_models)

### Get Zoning Type Function

In [11]:
def get_zoning(row):
    index = np.argmax(row)
    return zoning_types[index]

In [37]:
image_df["CNN_ens_zoning"] = image_df["CNN_ens"].apply(get_zoning)

In [38]:
image_df.head(10)

Unnamed: 0,zoning,filepath,CNN_ens,CNN_ens_zoning
143182,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.614, 0.398]]",res_low
163183,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.795, 0.0, 0.0, 0.0, 0.204, 0.0]]",com_med
126053,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.986, 0.0]]",res_low
158142,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.007, 0.0]]",res_low
63081,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.987, 0.0]]",res_low
155132,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.014, 0.0]]",res_low
127064,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.011, 0.0]]",res_low
185185,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.992, 0.0]]",res_low
144034,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.008, 0.0]]",res_low
129122,res_low,../02_Data/06_Test_Train_Val/test/res_low/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.028, 0.0]]",res_low


### EDA On the Predictions

Types of zoning getting predicted. Notable that High Industrial is not being predicted

In [39]:
predict = "CNN_ens_zoning"

In [40]:
len(image_df["zoning"])

1504

In [41]:
image_df[predict].value_counts()

res_low           876
ind_office_med    179
res_high          169
ind_high          139
com_med           117
res_med            21
com_high            3
Name: CNN_ens_zoning, dtype: int64

Predictions for res_low

In [42]:
image_df[image_df["zoning"]=="res_low"][predict].value_counts()

res_low           778
res_high           70
com_med            46
ind_high           22
res_med            16
ind_office_med     13
Name: CNN_ens_zoning, dtype: int64

Predictions for res_med

In [43]:
image_df[image_df["zoning"]=="res_med"][predict].value_counts()

res_low     68
res_high    19
res_med      2
com_med      1
Name: CNN_ens_zoning, dtype: int64

Predictions for res_high

In [44]:
image_df[image_df["zoning"]=="res_high"][predict].value_counts()

res_high          57
res_low           16
com_med           10
ind_office_med     5
ind_high           1
res_med            1
Name: CNN_ens_zoning, dtype: int64

Predictions for com_med

In [45]:
image_df[image_df["zoning"]=="com_med"][predict].value_counts()

ind_office_med    54
ind_high          42
com_med           32
res_low           12
res_high           9
Name: CNN_ens_zoning, dtype: int64

Predictions for com_high

In [46]:
image_df[image_df["zoning"]=="com_high"][predict].value_counts()

ind_office_med    31
ind_high          21
res_high           9
com_med            8
com_high           3
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_office_med

In [47]:
image_df[image_df["zoning"]=='ind_office_med'][predict].value_counts()

ind_office_med    38
ind_high          28
com_med           14
res_low            2
res_med            1
res_high           1
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_high

In [48]:
image_df[image_df["zoning"]=="ind_high"][predict].value_counts()

ind_office_med    38
ind_high          25
com_med            6
res_high           4
res_med            1
Name: CNN_ens_zoning, dtype: int64

### Scoring Validation Predictions

In [49]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [50]:
y_val = image_df["zoning"]
predict_list = ["CNN_ens_zoning"]
# predict_list = ["CNN_zoning", "CNN_ens_zoning", "VGG_zoning", "VGG_ens_zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores[predict_type] = [accuracy_score(y_val, y_pred),
                                    precision_score(y_val, y_pred, average="weighted"),
                                    recall_score(y_val, y_pred, average="weighted"),
                                    f1_score(y_val, y_pred, average="weighted")]

In [51]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns = ["accuracy","precision","recall","f1"]
predict_scores_df

Unnamed: 0,accuracy,precision,recall,f1
CNN_ens_zoning,0.621676,0.679589,0.621676,0.620739


In [52]:
image_df["zoning"].value_counts()

res_low           945
com_med           149
res_med            90
res_high           90
ind_office_med     84
ind_high           74
com_high           72
Name: zoning, dtype: int64

In [53]:
list(np.sort(image_df["zoning"].unique()))

['com_high',
 'com_med',
 'ind_high',
 'ind_office_med',
 'res_high',
 'res_low',
 'res_med']

In [54]:
y_val = image_df["zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores["01_" + predict_type + "_accuracy"] = [np.round(accuracy_score(y_val, y_pred),2)] * len(zoning_types)
    predict_scores["02_" + predict_type + "_precision"] = list(np.round(precision_score(y_val, y_pred, average=None),2))
    predict_scores["03_" + predict_type + "_recall"] = list(np.round(recall_score(y_val, y_pred, average=None),2))   
    predict_scores["04_" + predict_type + "_f1"] = list(np.round(f1_score(y_val, y_pred, average=None),2))

In [55]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns=zoning_types
predict_scores_df.sort_index()

Unnamed: 0,com_high,com_med,ind_high,ind_office_med,res_high,res_low,res_med
01_CNN_ens_zoning_accuracy,0.62,0.62,0.62,0.62,0.62,0.62,0.62
02_CNN_ens_zoning_precision,1.0,0.27,0.18,0.21,0.34,0.89,0.1
03_CNN_ens_zoning_recall,0.04,0.21,0.34,0.45,0.63,0.82,0.02
04_CNN_ens_zoning_f1,0.08,0.24,0.23,0.29,0.44,0.85,0.04


In [57]:
y_val = image_df["zoning"]

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    print(predict_type," Precision: ", precision_score(y_val, y_pred, average="weighted"))
    print(predict_type, " Recall: ", recall_score(y_val, y_pred, average="weighted"))
    print(predict_type, " F1", f1_score(y_val, y_pred, average="weighted"))

CNN_ens_zoning  Precision:  0.6795885449333742
CNN_ens_zoning  Recall:  0.6216755319148937
CNN_ens_zoning  F1 0.620738583852329
