In [2]:
import keras
from keras.models import load_model

import cv2
import numpy as np
import pandas as pd

from __future__ import division
import os, os.path
from collections import defaultdict

Using TensorFlow backend.


### Load Validation Set Images & Turn into DataFrame

In [3]:
main_path = "../02_Data/06_Test_Train_Val/val"
image_dict = defaultdict()

val_path, val_dirs, val_files = next(os.walk(main_path))
for zoning_type in val_dirs:
    zoning_path, zoning_dirs, zoning_files = next(os.walk(main_path + "/" + zoning_type))
    for file in zoning_files:
        key = int(file[-10:-4])
        total_path = zoning_path + "/" + file
        image_dict[key] = [zoning_type,total_path]

In [4]:
image_df = pd.DataFrame.from_dict(image_dict,orient='index')
image_df.columns = ["zoning","filepath"]
image_df.head()

Unnamed: 0,zoning,filepath
171130,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...
113051,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...
49082,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...
224122,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...
172154,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...


### Load CNN Models

In [5]:
CNN_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers4_v1.h5')
CNN_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers4_v2.h5')
CNN_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers4_v3.h5')
CNN_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers4_v4.h5')
CNN_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers4_v5.h5')

In [6]:
CNN_models = [CNN_model_1, CNN_model_2, CNN_model_3, CNN_model_4, CNN_model_5]

### Load VGG Pre-Trained Model

In [7]:
VGG_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained4_v1.h5')
VGG_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained4_v2.h5')
VGG_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained4_v3.h5')
VGG_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained4_v4.h5')
VGG_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained4_v5.h5')

In [8]:
VGG_models = [VGG_model_1, VGG_model_2, VGG_model_3, VGG_model_4, VGG_model_5]

### Zoning Dictionary

In [9]:
zoning_types = list(np.sort(val_dirs))   # the directories are the zoning type classifications
                                         # which I copied from the notebook "05_Modelling_CNN_Scratch"
                                         # from the code: train_generator.class_indices

### Prediction Function - Single CNN Model

In [10]:
def CNN_predict(row, model):
    img = cv2.imread(row, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, dsize=(150, 150))                                     # resize image to 150 for processing 
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],1)
    return np.round(model.predict(img_array),3)

### Prediction Function - Ensembled CNN Models

In [11]:
def CNN_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                          # randomness to help break ties
        predict_ensemble = predict_ensemble + CNN_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)

### Prediction Function - Single VGG Pretrained Model 

In [12]:
from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))

def VGG_predict(row, model):
    img = cv2.imread(row)
    img = cv2.resize(img, dsize=(150, 150))                                     # resize image to 150 for processing 
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],3)    # 3 b/c it will be read as RGB
    feature_array = conv_base.predict(img_array)
    feature_array = feature_array.reshape(1, 4 * 4 * 512) 
    return np.round(model.predict(feature_array),3)

### Prediction Function - Ensembled VGG Pretrained Model

In [13]:
def VGG_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                              # randomness to help break ties
        predict_ensemble = predict_ensemble + VGG_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)        

### Run the Prediction on the Validation Set

In [14]:
# image_df["CNN"] = image_df["filepath"].apply(CNN_predict, model=CNN_model_1)
image_df["CNN_ens"] = image_df["filepath"].apply(CNN_ensemble_predict, model_list= CNN_models)
image_df["VGG"] = image_df["filepath"].apply(VGG_predict, model=VGG_model_1)
image_df["VGG_ens"] = image_df["filepath"].apply(VGG_ensemble_predict, model_list= VGG_models)

### Get Zoning Type Function

In [15]:
def get_zoning(row):
    index = np.argmax(row)
    return zoning_types[index]

In [16]:
# image_df["CNN_zoning"] = image_df["CNN"].apply(get_zoning)
image_df["CNN_ens_zoning"] = image_df["CNN_ens"].apply(get_zoning)
image_df["VGG_zoning"] = image_df["VGG"].apply(get_zoning)
image_df["VGG_ens_zoning"] = image_df["VGG_ens"].apply(get_zoning)

In [17]:
image_df.head(50)

Unnamed: 0,zoning,filepath,CNN_ens,VGG,VGG_ens,CNN_ens_zoning,VGG_zoning,VGG_ens_zoning
171130,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.007, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.024]]",res_low,res_med,res_med
113051,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.007, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.233, 0.764]]",res_low,res_med,res_med
49082,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.983, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.988, 0.003]]",res_low,res_low,res_low
224122,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.001, 0.0, 0.0, 0.0, 0.791, 0.19]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.989, 0.011]]","[[0.0, 0.0, 0.0, 0.0, 0.199, 0.203, 0.609]]",res_low,res_low,res_med
172154,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.021, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.987, 0.001]]",res_low,res_low,res_low
42129,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.803, 0.209]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.998]]",res_low,res_med,res_med
110046,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 1.007, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.995]]",res_high,res_med,res_med
39057,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 1.011, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 1.008, 0.0, 0.0]]",res_high,res_high,res_high
155188,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.81, 0.191]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.029]]",res_low,res_med,res_med
138118,res_low,../02_Data/06_Test_Train_Val/val/res_low/Las_V...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.985, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.999]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.419, 0.6]]",res_low,res_med,res_med


### EDA On the Predictions

Types of zoning getting predicted. Notable that High Industrial is not being predicted

In [18]:
predict = "CNN_ens_zoning"
# predict = "VGG_ens_zoning"
# predict = "VGG_zoning"
# predict = "predict_zoning"

In [19]:
len(image_df["zoning"])

1504

In [20]:
image_df[predict].value_counts()

res_low           835
res_high          171
ind_office_med    154
com_med           153
ind_high          151
res_med            37
com_high            3
Name: CNN_ens_zoning, dtype: int64

Predictions for res_low

In [21]:
image_df[image_df["zoning"]=="res_low"][predict].value_counts()

res_low           742
res_high           74
com_med            65
res_med            27
ind_high           23
ind_office_med     14
Name: CNN_ens_zoning, dtype: int64

Predictions for res_med

In [22]:
image_df[image_df["zoning"]=="res_med"][predict].value_counts()

res_low           62
res_high          14
res_med            7
com_med            6
ind_office_med     1
Name: CNN_ens_zoning, dtype: int64

Predictions for res_high

In [23]:
image_df[image_df["zoning"]=="res_high"][predict].value_counts()

res_high          55
res_low           18
com_med           11
ind_office_med     3
res_med            2
com_high           1
Name: CNN_ens_zoning, dtype: int64

Predictions for com_med

In [24]:
image_df[image_df["zoning"]=="com_med"][predict].value_counts()

ind_office_med    49
ind_high          38
com_med           38
res_high          13
res_low           10
res_med            1
Name: CNN_ens_zoning, dtype: int64

Predictions for com_high

In [25]:
image_df[image_df["zoning"]=="com_high"][predict].value_counts()

ind_office_med    23
ind_high          22
com_med           16
res_high          10
com_high           1
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_office_med

In [26]:
image_df[image_df["zoning"]=='ind_office_med'][predict].value_counts()

ind_office_med    34
ind_high          32
com_med           13
res_high           3
res_low            2
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_high

In [27]:
image_df[image_df["zoning"]=="ind_high"][predict].value_counts()

ind_high          36
ind_office_med    30
com_med            4
res_high           2
res_low            1
com_high           1
Name: CNN_ens_zoning, dtype: int64

### Scoring Validation Predictions

In [28]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [29]:
y_val = image_df["zoning"]
predict_list = ["CNN_ens_zoning","VGG_zoning","VGG_ens_zoning"]
# predict_list = ["CNN_zoning", "CNN_ens_zoning", "VGG_zoning", "VGG_ens_zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores[predict_type] = [accuracy_score(y_val, y_pred),
                                    precision_score(y_val, y_pred, average="weighted"),
                                    recall_score(y_val, y_pred, average="weighted"),
                                    f1_score(y_val, y_pred, average="weighted")]

In [30]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns = ["accuracy","precision","recall","f1"]
predict_scores_df

Unnamed: 0,accuracy,precision,recall,f1
CNN_ens_zoning,0.607048,0.653535,0.607048,0.613566
VGG_zoning,0.355718,0.612147,0.355718,0.406549
VGG_ens_zoning,0.355053,0.626812,0.355053,0.407466


In [31]:
image_df["zoning"].value_counts()

res_low           945
com_med           149
res_high           90
res_med            90
ind_office_med     84
ind_high           74
com_high           72
Name: zoning, dtype: int64

In [32]:
list(np.sort(image_df["zoning"].unique()))

['com_high',
 'com_med',
 'ind_high',
 'ind_office_med',
 'res_high',
 'res_low',
 'res_med']

In [33]:
y_val = image_df["zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores["01_" + predict_type + "_accuracy"] = [np.round(accuracy_score(y_val, y_pred),2)] * len(zoning_types)
    predict_scores["02_" + predict_type + "_precision"] = list(np.round(precision_score(y_val, y_pred, average=None),2))
    predict_scores["03_" + predict_type + "_recall"] = list(np.round(recall_score(y_val, y_pred, average=None),2))   
    predict_scores["04_" + predict_type + "_f1"] = list(np.round(f1_score(y_val, y_pred, average=None),2))

In [34]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns=zoning_types
predict_scores_df.sort_index()

Unnamed: 0,com_high,com_med,ind_high,ind_office_med,res_high,res_low,res_med
01_CNN_ens_zoning_accuracy,0.61,0.61,0.61,0.61,0.61,0.61,0.61
01_VGG_ens_zoning_accuracy,0.36,0.36,0.36,0.36,0.36,0.36,0.36
01_VGG_zoning_accuracy,0.36,0.36,0.36,0.36,0.36,0.36,0.36
02_CNN_ens_zoning_precision,0.33,0.25,0.24,0.22,0.32,0.89,0.19
02_VGG_ens_zoning_precision,0.21,0.46,0.25,0.28,0.29,0.83,0.1
02_VGG_zoning_precision,0.19,0.43,0.0,0.25,0.27,0.83,0.1
03_CNN_ens_zoning_recall,0.01,0.26,0.49,0.4,0.61,0.79,0.08
03_VGG_ens_zoning_recall,0.51,0.09,0.05,0.26,0.23,0.39,0.76
03_VGG_zoning_recall,0.62,0.15,0.0,0.21,0.19,0.39,0.72
04_CNN_ens_zoning_f1,0.03,0.25,0.32,0.29,0.42,0.83,0.11


In [35]:
y_val = image_df["zoning"]

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    print(predict_type," Precision: ", precision_score(y_val, y_pred, average="weighted"))
    print(predict_type, " Recall: ", recall_score(y_val, y_pred, average="weighted"))
    print(predict_type, " F1", f1_score(y_val, y_pred, average="weighted"))

CNN_ens_zoning  Precision:  0.6535354209050658
CNN_ens_zoning  Recall:  0.6070478723404256
CNN_ens_zoning  F1 0.6135657683851529
VGG_zoning  Precision:  0.6121474700721077
VGG_zoning  Recall:  0.355718085106383
VGG_zoning  F1 0.4065493323391013
VGG_ens_zoning  Precision:  0.6268117179464122
VGG_ens_zoning  Recall:  0.3550531914893617
VGG_ens_zoning  F1 0.40746601094358653
