In [2]:
import cv2
import numpy as np
import pandas as pd

import keras
from keras.models import load_model

from __future__ import division
import os, os.path
from collections import defaultdict

Using TensorFlow backend.


### Load Validation Set Images & Turn into DataFrame

In [3]:
main_path = "../02_Data/06_Test_Train_Val/val"
image_dict = defaultdict()

val_path, val_dirs, val_files = next(os.walk(main_path))
for zoning_type in val_dirs:
    zoning_path, zoning_dirs, zoning_files = next(os.walk(main_path + "/" + zoning_type))
    for file in zoning_files:
        key = int(file[-10:-4])
        total_path = zoning_path + "/" + file
        image_dict[key] = [zoning_type,total_path]

In [4]:
image_df = pd.DataFrame.from_dict(image_dict,orient='index',columns=["zoning","filepath"])
image_df.head()

Unnamed: 0,zoning,filepath
179172,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...
177188,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...
166133,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...
148033,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...
57107,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...


### Load CNN Models

In [5]:
CNN_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_v1.h5')
CNN_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_v2.h5')
CNN_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_v3.h5')
CNN_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_v4.h5')
CNN_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_v5.h5')

In [6]:
CNN_models = [CNN_model_1, CNN_model_2, CNN_model_3, CNN_model_4, CNN_model_5]

### Load VGG Pre-Trained Model

In [7]:
VGG_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained_v1.h5')
VGG_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained_v2.h5')
VGG_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained_v3.h5')
VGG_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained_v4.h5')
VGG_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_Pretrained_v5.h5')

In [8]:
VGG_models = [VGG_model_1, VGG_model_2, VGG_model_3, VGG_model_4, VGG_model_5]

### Zoning Dictionary

In [9]:
zoning_types = list(np.sort(val_dirs))   # the directories are the zoning type classifications
                                         # which I copied from the notebook "05_Modelling_CNN_Scratch"
                                         # from the code: train_generator.class_indices

### Prediction Function - Single CNN Model

In [10]:
def CNN_predict(row, model):
    img = cv2.imread(row, cv2.IMREAD_GRAYSCALE)
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],1)
    return np.round(model.predict(img_array),3)

### Prediction Function - Ensembled CNN Models

In [11]:
def CNN_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                          # randomness to help break ties
        predict_ensemble = predict_ensemble + CNN_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)

### Prediction Function - Single VGG Pretrained Model 

In [12]:
from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(91, 91, 3))

def VGG_predict(row, model):
    img = cv2.imread(row)
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],3)    # 3 b/c it will be read as RGB
    feature_array = conv_base.predict(img_array)
    feature_array = feature_array.reshape(1, 2 * 2 * 512) 
    return np.round(model.predict(feature_array),3)

### Prediction Function - Ensembled VGG Pretrained Model

In [13]:
def VGG_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                              # randomness to help break ties
        predict_ensemble = predict_ensemble + VGG_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)        

### Run the Prediction on the Validation Set

In [14]:
# image_df["CNN"] = image_df["filepath"].apply(CNN_predict, model=CNN_model_1)
image_df["CNN_ens"] = image_df["filepath"].apply(CNN_ensemble_predict, model_list= CNN_models)
# image_df["VGG"] = image_df["filepath"].apply(VGG_predict, model=VGG_model_1)
image_df["VGG_ens"] = image_df["filepath"].apply(VGG_ensemble_predict, model_list= VGG_models)

### Get Zoning Type Function

In [15]:
def get_zoning(row):
    index = np.argmax(row)
    return zoning_types[index]

In [16]:
# image_df["CNN_zoning"] = image_df["CNN"].apply(get_zoning)
image_df["CNN_ens_zoning"] = image_df["CNN_ens"].apply(get_zoning)
# image_df["VGG_zoning"] = image_df["VGG"].apply(get_zoning)
image_df["VGG_ens_zoning"] = image_df["VGG_ens"].apply(get_zoning)

In [17]:
image_df.head(25)

Unnamed: 0,zoning,filepath,CNN_ens,VGG_ens,CNN_ens_zoning,VGG_ens_zoning
179172,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.792, 0.0, 0.195]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.997]]",res_high,res_med
177188,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.992, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.029, 0.0]]",res_high,res_low
166133,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.402, 0.387, 0.209]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.234, 0.758]]",res_high,res_med
148033,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.781, 0.0, 0.199]]","[[0.0, 0.0, 0.0, 0.0, 0.857, 0.132, 0.0]]",res_high,res_high
57107,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.795, 0.0, 0.204]]","[[0.0, 0.0, 0.0, 0.0, 0.796, 0.192, 0.014]]",res_high,res_high
126073,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 1.016, 0.0, 0.0]]","[[0.0, 0.0, 0.118, 0.0, 0.0, 0.397, 0.478]]",res_high,res_med
123080,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.819, 0.0, 0.209]]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.592, 0.393]]",res_high,res_low
66090,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.994, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.807, 0.0, 0.201]]",res_high,res_high
134073,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 0.579, 0.413, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 0.012, 0.698, 0.281]]",res_high,res_low
40082,res_high,../02_Data/06_Test_Train_Val/val/res_high/Las_...,"[[0.0, 0.0, 0.0, 0.0, 1.022, 0.0, 0.0]]","[[0.0, 0.0, 0.0, 0.0, 1.005, 0.0, 0.0]]",res_high,res_high


### EDA On the Predictions

Types of zoning getting predicted. Notable that High Industrial is not being predicted

In [18]:
# predict = "CNN_ens_zoning"
predict = "VGG_ens_zoning"
# predict = "VGG_zoning"
# predict = "predict_zoning"

In [19]:
len(image_df["zoning"])

1504

In [20]:
image_df[predict].value_counts()

res_low           1000
ind_high           170
res_med            159
res_high            60
ind_office_med      55
com_high            41
com_med             19
Name: VGG_ens_zoning, dtype: int64

Predictions for res_low

In [21]:
image_df[image_df["zoning"]=="res_low"][predict].value_counts()

res_low           804
res_med            99
res_high           13
ind_office_med     10
ind_high           10
com_med             5
com_high            4
Name: VGG_ens_zoning, dtype: int64

Predictions for res_med

In [22]:
image_df[image_df["zoning"]=="res_med"][predict].value_counts()

res_low     63
res_med     19
res_high     7
ind_high     1
Name: VGG_ens_zoning, dtype: int64

Predictions for res_high

In [23]:
image_df[image_df["zoning"]=="res_high"][predict].value_counts()

res_low           40
res_high          25
res_med           19
ind_office_med     2
com_high           2
com_med            1
ind_high           1
Name: VGG_ens_zoning, dtype: int64

Predictions for com_med

In [24]:
image_df[image_df["zoning"]=="com_med"][predict].value_counts()

ind_high          47
res_low           45
com_high          16
ind_office_med    14
res_med           11
com_med            8
res_high           8
Name: VGG_ens_zoning, dtype: int64

Predictions for com_high

In [25]:
image_df[image_df["zoning"]=="com_high"][predict].value_counts()

ind_high          24
res_low           15
com_high          14
ind_office_med     8
res_high           5
com_med            3
res_med            3
Name: VGG_ens_zoning, dtype: int64

Predictions for ind_office_med

In [26]:
image_df[image_df["zoning"]=='ind_office_med'][predict].value_counts()

ind_high          39
res_low           19
ind_office_med    14
res_med            5
com_high           4
com_med            2
res_high           1
Name: VGG_ens_zoning, dtype: int64

Predictions for ind_high

In [27]:
image_df[image_df["zoning"]=="ind_high"][predict].value_counts()

ind_high          48
res_low           14
ind_office_med     7
res_med            3
res_high           1
com_high           1
Name: VGG_ens_zoning, dtype: int64

### Scoring Validation Predictions

In [28]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [30]:
y_val = image_df["zoning"]
predict_list = ["CNN_ens_zoning", "VGG_ens_zoning"]
# predict_list = ["CNN_zoning", "CNN_ens_zoning", "VGG_zoning", "VGG_ens_zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores[predict_type] = [accuracy_score(y_val, y_pred),
                                    precision_score(y_val, y_pred, average="weighted"),
                                    recall_score(y_val, y_pred, average="weighted"),
                                    f1_score(y_val, y_pred, average="weighted")]

In [31]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index',columns=["accuracy","precision","recall","f1"])
predict_scores_df

Unnamed: 0,accuracy,precision,recall,f1
CNN_ens_zoning,0.518617,0.65583,0.518617,0.556282
VGG_ens_zoning,0.619681,0.623426,0.619681,0.600443


In [34]:
y_val = image_df["zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores["01_" + predict_type + "_accuracy"] = [np.round(accuracy_score(y_val, y_pred),2)] * len(zoning_types)
    predict_scores["02_" + predict_type + "_precision"] = list(np.round(precision_score(y_val, y_pred, average=None),2))
    predict_scores["03_" + predict_type + "_recall"] = list(np.round(recall_score(y_val, y_pred, average=None),2))   
    predict_scores["04_" + predict_type + "_f1"] = list(np.round(f1_score(y_val, y_pred, average=None),2))

In [35]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index',columns=zoning_types)
predict_scores_df.sort_index()

Unnamed: 0,com_high,com_med,ind_high,ind_office_med,res_high,res_low,res_med
01_CNN_ens_zoning_accuracy,0.52,0.52,0.52,0.52,0.52,0.52,0.52
01_VGG_ens_zoning_accuracy,0.62,0.62,0.62,0.62,0.62,0.62,0.62
02_CNN_ens_zoning_precision,0.37,0.28,0.15,0.28,0.26,0.9,0.13
02_VGG_ens_zoning_precision,0.34,0.42,0.28,0.25,0.42,0.8,0.12
03_CNN_ens_zoning_recall,0.18,0.14,0.09,0.35,0.78,0.64,0.43
03_VGG_ens_zoning_recall,0.19,0.05,0.65,0.17,0.28,0.85,0.21
04_CNN_ens_zoning_f1,0.24,0.19,0.11,0.31,0.39,0.74,0.2
04_VGG_ens_zoning_f1,0.25,0.1,0.39,0.2,0.33,0.83,0.15
