In [1]:
import keras
from keras.models import load_model

import cv2
import numpy as np
import pandas as pd

from __future__ import division
import os, os.path
from collections import defaultdict

Using TensorFlow backend.


### Load Test Set Images & Turn into DataFrame

In [2]:
main_path = "../../02_Data/06_Test_Train_Val/test"
image_dict = defaultdict()

val_path, val_dirs, val_files = next(os.walk(main_path))
for zoning_type in val_dirs:
    zoning_path, zoning_dirs, zoning_files = next(os.walk(main_path + "/" + zoning_type))
    for file in zoning_files:
        key = int(file[-10:-4])
        total_path = zoning_path + "/" + file
        image_dict[key] = [zoning_type,total_path]

In [3]:
image_df = pd.DataFrame.from_dict(image_dict,orient='index')
image_df.columns = ["zoning","filepath"]
image_df.head()

Unnamed: 0,zoning,filepath
134072,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...
162181,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...
134113,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...
187130,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...
128076,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...


### Load Final CNN Models for Ensembling

In [4]:
CNN_model_1 = load_model('../../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v1.h5')
CNN_model_2 = load_model('../../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v2.h5')
CNN_model_3 = load_model('../../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v3.h5')
CNN_model_4 = load_model('../../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v4.h5')
CNN_model_5 = load_model('../../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v5.h5')

In [5]:
CNN_models = [CNN_model_1, CNN_model_2, CNN_model_3, CNN_model_4, CNN_model_5]

### Zoning Dictionary

In [6]:
zoning_types = list(np.sort(val_dirs))   # the directories are the zoning type classifications
                                         # which I copied from the notebook "05_Modelling_CNN_Scratch"
                                         # from the code: train_generator.class_indices

### Prediction Function - Single CNN Model

In [7]:
def CNN_predict(row, model):
    img = cv2.imread(row, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, dsize=(150, 150))                                     # resize image to 150 for processing 
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],1)
    return np.round(model.predict(img_array),3)

### Prediction Function - Ensembled CNN Models

In [8]:
def CNN_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                          # randomness to help break ties
        predict_ensemble = predict_ensemble + CNN_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)

### Run the Prediction on the Test Set

In [9]:
image_df["CNN_ens"] = image_df["filepath"].apply(CNN_ensemble_predict, model_list= CNN_models)

### Get Zoning Type Function

In [10]:
def get_zoning(row):
    index = np.argmax(row)
    return zoning_types[index]

In [11]:
image_df["CNN_ens_zoning"] = image_df["CNN_ens"].apply(get_zoning)

In [12]:
image_df.head(10)

Unnamed: 0,zoning,filepath,CNN_ens,CNN_ens_zoning
134072,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 0.996, 0.0, 0.0]]",res_high
162181,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.391, 0.0, 0.0, 0.389, 0.202, 0.0]]",com_med
134113,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.211, 0.0, 0.0, 0.615, 0.194, 0.0]]",res_high
187130,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.988, 0.0]]",res_low
128076,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.205, 0.0, 0.0, 0.206, 0.61, 0.0]]",res_low
179173,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 1.005, 0.0, 0.0]]",res_high
131121,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 0.984, 0.0, 0.0]]",res_high
172151,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.004, 0.0]]",res_low
178118,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.999, 0.0, 0.0, 0.0, 0.0, 0.0]]",com_med
64119,res_high,../../02_Data/06_Test_Train_Val/test/res_high/...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.003, 0.0]]",res_low


### EDA On the Predictions

Types of zoning getting predicted. Notable that High Industrial is not being predicted

In [13]:
predict = "CNN_ens_zoning"

In [14]:
len(image_df["zoning"])

1504

In [16]:
image_df[predict].value_counts()

res_low           874
ind_office_med    178
res_high          171
ind_high          139
com_med           118
res_med            21
com_high            3
Name: CNN_ens_zoning, dtype: int64

Predictions for res_low

In [17]:
image_df[image_df["zoning"]=="res_low"][predict].value_counts()

res_low           778
res_high           72
com_med            43
ind_high           22
res_med            16
ind_office_med     14
Name: CNN_ens_zoning, dtype: int64

Predictions for res_med

In [18]:
image_df[image_df["zoning"]=="res_med"][predict].value_counts()

res_low     68
res_high    19
res_med      2
com_med      1
Name: CNN_ens_zoning, dtype: int64

Predictions for res_high

In [19]:
image_df[image_df["zoning"]=="res_high"][predict].value_counts()

res_high          56
res_low           15
com_med           12
ind_office_med     5
ind_high           1
res_med            1
Name: CNN_ens_zoning, dtype: int64

Predictions for com_med

In [20]:
image_df[image_df["zoning"]=="com_med"][predict].value_counts()

ind_office_med    53
ind_high          43
com_med           32
res_low           11
res_high          10
Name: CNN_ens_zoning, dtype: int64

Predictions for com_high

In [21]:
image_df[image_df["zoning"]=="com_high"][predict].value_counts()

ind_office_med    31
ind_high          21
res_high           9
com_med            8
com_high           3
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_office_med

In [22]:
image_df[image_df["zoning"]=='ind_office_med'][predict].value_counts()

ind_office_med    38
ind_high          27
com_med           16
res_low            2
res_med            1
Name: CNN_ens_zoning, dtype: int64

Predictions for ind_high

In [23]:
image_df[image_df["zoning"]=="ind_high"][predict].value_counts()

ind_office_med    37
ind_high          25
com_med            6
res_high           5
res_med            1
Name: CNN_ens_zoning, dtype: int64

### Scoring Test Predictions

In [24]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [25]:
y_val = image_df["zoning"]
predict_list = ["CNN_ens_zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores[predict_type] = [accuracy_score(y_val, y_pred),
                                    precision_score(y_val, y_pred, average="weighted"),
                                    recall_score(y_val, y_pred, average="weighted"),
                                    f1_score(y_val, y_pred, average="weighted")]

In [26]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns = ["accuracy","precision","recall","f1"]
predict_scores_df

Unnamed: 0,accuracy,precision,recall,f1
CNN_ens_zoning,0.621011,0.680116,0.621011,0.620641


In [27]:
image_df["zoning"].value_counts()

res_low           945
com_med           149
res_high           90
res_med            90
ind_office_med     84
ind_high           74
com_high           72
Name: zoning, dtype: int64

In [28]:
list(np.sort(image_df["zoning"].unique()))

['com_high',
 'com_med',
 'ind_high',
 'ind_office_med',
 'res_high',
 'res_low',
 'res_med']

In [29]:
y_val = image_df["zoning"]

predict_scores = defaultdict()

for predict_type in predict_list:
    y_pred = image_df[predict_type]
    predict_scores["01_" + predict_type + "_accuracy"] = [np.round(accuracy_score(y_val, y_pred),2)] * len(zoning_types)
    predict_scores["02_" + predict_type + "_precision"] = list(np.round(precision_score(y_val, y_pred, average=None),2))
    predict_scores["03_" + predict_type + "_recall"] = list(np.round(recall_score(y_val, y_pred, average=None),2))   
    predict_scores["04_" + predict_type + "_f1"] = list(np.round(f1_score(y_val, y_pred, average=None),2))

In [30]:
predict_scores_df = pd.DataFrame.from_dict(predict_scores,orient='index')
predict_scores_df.columns=zoning_types
predict_scores_df.sort_index()

Unnamed: 0,com_high,com_med,ind_high,ind_office_med,res_high,res_low,res_med
01_CNN_ens_zoning_accuracy,0.62,0.62,0.62,0.62,0.62,0.62,0.62
02_CNN_ens_zoning_precision,1.0,0.27,0.18,0.21,0.33,0.89,0.1
03_CNN_ens_zoning_recall,0.04,0.21,0.34,0.45,0.62,0.82,0.02
04_CNN_ens_zoning_f1,0.08,0.24,0.23,0.29,0.43,0.86,0.04
