In [31]:
import matplotlib.image as mpimg
import pandas as pd
import numpy as np
import shutil
import string
import keras
import os
import cv2

from keras.models import load_model
from collections import defaultdict
from shutil import copyfile

In [2]:
original_image = mpimg.imread("../02_Data/04_Zoning_Maps/Las_Vegas_MSA.png")
print(original_image.shape)

(4562, 4259, 3)


### Global Variables

In [3]:
cols = 224
rows = 240

min_zoning_threshold = .50     # this sets the minimum percent that a zoning type should cover before an image
                               # is assigned to that type. It is used in cell 'Zoning - Processing Function'
min_coverage_threshold = 0.075 # this variable sets the minimum building coverage in each image. It is used in
                               # cell 'Site Coverage - EDA'

### Zoning - Internal Variables

In [4]:
# RGB values mapped to program type
red = 0
green = 1
blue = 2

# keys for dictionary
commercial_high = (red, round(10*255/255))
commercial_med  = (red, round(10*200/255))
industrial_high = (green, round(10*75/255))
industrial_office_med = (green, round(10*150/255))
res_low  = (blue, round(10*255/255))
res_med  = (blue, round(10*200/255))
res_high = (blue, round(10*100/255))

# zoning_list
zoning_list = [commercial_high,commercial_med,industrial_high,
               industrial_office_med,res_low,res_med,res_high]
zoning_names = ["com_high","com_med","ind_high","ind_office_med","res_low","res_med","res_high"]
zoning_names_dict = dict(zip(zoning_list,zoning_names))

# Column/Row Coordinates
col_offset_float = original_image.shape[1]/cols
row_offset_float = original_image.shape[0]/rows

# Subdivided Image Size
col_pixels = int(col_offset_float)
row_pixels = int(row_offset_float)

### Zoning - Processing Function

In [5]:
def get_zoning(image_array):
    rgb_dict = defaultdict(int)
    
    for i in range(image_array.shape[0]):       # for the row dimension
        for j in range(image_array.shape[1]):   # for the column dimension
            pixel_rgb = image_array[i][j]
            if sum(pixel_rgb) > 1.1:            # this should exclude (1) noise and (2) white pixels (255,255,255)
                pass                            # correct pixels should never exceed 1 when summed (255,0,0)
            else:
                dominant_channel = np.argmax(image_array[i][j])                             # is the main channel value R,G or B?
                dominant_channel_value = int(round(10*image_array[i][j][dominant_channel])) # and what is the value? round to clean out noise
                rgb_dict[(dominant_channel,dominant_channel_value)] += 1                    # dictionary of channel + value, increment
                
    zoning_dict = defaultdict(int)

    for zoning in zoning_list:                  # extract the relevant values corresponding to zoning pixels
        new_key = zoning_names_dict[zoning]
        zoning_dict[new_key] = rgb_dict[zoning] # remap to new dictionary
    
    zoning_type, count = list(zoning_dict.keys()), list(zoning_dict.values())   # unzip to find largest value
    max_position = np.argmax(count)
    dominant_zoning = zoning_type[max_position]                                 # grab index of largest count
                                                                                # and find zoning type
    
    total_pixels = image_array.shape[0]*image_array.shape[1]
    if count[max_position] < (total_pixels * min_zoning_threshold):             # check if the dominant zoning
        dominant_zoning = "none"                                                # type meets a minimum threshold
    
    return dominant_zoning

### Zoning - Processing Loop

In [6]:
new_image = np.zeros(shape=(row_pixels,col_pixels,3))
image_dict = defaultdict(list)

for r in range(rows):                                   # go through the row coordinates
    start_pixel_row = int(r*row_offset_float)          
    for c in range(cols):                               # go through the column coordinates
        start_pixel_col = int(c*col_offset_float)       # define the start and end of the column pixels
        end_pixel_col = start_pixel_col + col_pixels   
        for rp in range(row_pixels):                    # and go row by row within the coordinate grid
            new_image[rp] = original_image[start_pixel_row+rp][start_pixel_col:end_pixel_col]
        
        row_name = ("00"+str(r))[-3:]
        col_name = ("00"+str(c))[-3:]
        image_dict[row_name+col_name] = [get_zoning(new_image)]

### Zoning - EDA
How many of each Zoning Type are there? 

In [7]:
zone_dict = defaultdict(int)

for square in list(image_dict.values()):
    zone_dict[square[0]] += 1

zone_dict

defaultdict(int,
            {'none': 41632,
             'ind_high': 1124,
             'res_low': 7211,
             'ind_office_med': 1118,
             'com_med': 1156,
             'res_med': 553,
             'res_high': 495,
             'com_high': 471})

### Create DataFrame

In [8]:
grid_df = pd.DataFrame.from_dict(image_dict, orient='index') 
grid_df = grid_df.rename({0:"zoning"},axis=1)
grid_df["code"] = grid_df.index.get_values()
grid_df = grid_df[["code","zoning"]]
grid_df.head()

Unnamed: 0,code,zoning
0,0,none
1,1,none
2,2,none
3,3,none
4,4,none


### Site Coverage - Function

In [9]:
def get_coverage(row):
    total = 8053.7   # I calcluated this manually since the white is not 100% 255,255,255 white. If 
                     # the image processing steps changes to a more white image, this value will have to change
    fragment = mpimg.imread(f"../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_{row}.png")
    pixel_count = sum(sum(fragment))[0]   # just add all the values together
    return round(1 - pixel_count/total,4) # round it to 4 decimal places

In [10]:
# This code finds the value for the variable total (8053.7)
# fragment = mpimg.imread("../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_000000.png")
# pixel_count = sum(sum(fragment))[0]
# pixel_count

### Site Coverage - Apply Function

In [10]:
grid_df["coverage"] = grid_df["code"].apply(get_coverage)

In [11]:
grid_df["coverage"] = grid_df["coverage"].astype(float)    # convert to a float (was a string)

### Site Coverage - EDA

In [12]:
grid_df.head()

Unnamed: 0,code,zoning,coverage
0,0,none,0.0
1,1,none,0.0
2,2,none,0.0
3,3,none,0.0
4,4,none,0.0


Number of plots with at least the minimum zoning and at least the minimum building coverage

In [13]:
zoned_lots_w_bldgs_df = grid_df[(grid_df["zoning"] != 'none') & (grid_df["coverage"] > min_coverage_threshold)] 
zoned_lots_w_bldgs_df.shape

(7533, 3)

In [14]:
zoned_lots_w_bldgs_df.head()

Unnamed: 0,code,zoning,coverage
4052,4052,res_low,0.1071
4053,4053,res_low,0.089
5053,5053,res_low,0.21
6041,6041,res_low,0.1099
6042,6042,res_low,0.0946


How many of each zoning type do I have images of with built buildings?

In [15]:
zoned_lots_w_bldgs_df["zoning"].value_counts()

res_low           4729
com_med            746
res_high           451
res_med            450
ind_office_med     423
ind_high           373
com_high           361
Name: zoning, dtype: int64

### Function to Get URL Filepath

In [20]:
def get_url(row):
    url_path = f"../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_{row}.png"
    return url_path

### Add URL Filepath to DataFrame

In [21]:
data_df = zoned_lots_w_bldgs_df
data_df["url"] = data_df["code"].apply(get_url)
data_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,code,zoning,coverage,url
4052,4052,res_low,0.1071,../02_Data/03_Images/02_PSD_Processed/05_0.25x...
4053,4053,res_low,0.089,../02_Data/03_Images/02_PSD_Processed/05_0.25x...
5053,5053,res_low,0.21,../02_Data/03_Images/02_PSD_Processed/05_0.25x...
6041,6041,res_low,0.1099,../02_Data/03_Images/02_PSD_Processed/05_0.25x...
6042,6042,res_low,0.0946,../02_Data/03_Images/02_PSD_Processed/05_0.25x...


### Load Predictive Models

In [24]:
CNN_model_1 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v1.h5')
CNN_model_2 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v2.h5')
CNN_model_3 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v3.h5')
CNN_model_4 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v4.h5')
CNN_model_5 = load_model('../06_Jupyter_Notebooks/01_Models/las_vegas_CNN_Trained_12_Layers_Final_v5.h5')

In [25]:
CNN_models = [CNN_model_1, CNN_model_2, CNN_model_3, CNN_model_4, CNN_model_5]

### Prediction Function - Single CNN Model

In [26]:
def CNN_predict(row, model):
    img = cv2.imread(row, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, dsize=(150, 150))                                     # resize image to 150 for processing 
    img_array = np.array(img)
    img_array = img_array.reshape(1,img_array.shape[0],img_array.shape[1],1)
    return np.round(model.predict(img_array),3)

### Prediction Function - Ensembled CNN Models

In [27]:
def CNN_ensemble_predict(row, model_list):
    predict_ensemble = np.zeros((1,7))
    for m in model_list:                                          # randomness to help break ties
        predict_ensemble = predict_ensemble + CNN_predict(row, m) * np.random.uniform(.95,1.05)  
    predict_ensemble = predict_ensemble / len(model_list)         # divide by number of models
    return np.round(predict_ensemble,3)

### Run Prediction on Images with Zoning and Building Coverage

In [32]:
predict_df = data_df[(data_df["zoning"] != 'none') & (data_df["coverage"] > min_coverage_threshold)] 
predict_df["prediction"] = predict_df["url"].apply(CNN_ensemble_predict,model_list=CNN_models)

In [33]:
predict_df.head()

Unnamed: 0,code,zoning,coverage,url,prediction
4052,4052,res_low,0.1071,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.605, 0.397]]"
4053,4053,res_low,0.089,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.761, 0.0, 0.0, 0.0, 0.233, 0.0]]"
5053,5053,res_low,0.21,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.001, 0.0]]"
6041,6041,res_low,0.1099,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.005, 0.0]]"
6042,6042,res_low,0.0946,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.205, 0.207, 0.19, 0.0, 0.387, 0.0]]"


### Get Zoning Definition

In [35]:
# the directories are the zoning type classifications
# which I copied from the notebook "05_Modelling_CNN_Scratch"
# from the code: train_generator.class_indices
zoning_types = list(np.sort(['com_high','com_med','ind_high','ind_office_med','res_high','res_low','res_med']))   

In [37]:
def get_zoning(row):
    index = np.argmax(row)
    return zoning_types[index]

### Get Zoning on Predictions

In [38]:
predict_df["predicted_zoning"]=predict_df["prediction"].apply(get_zoning)
predict_df.head()

Unnamed: 0,code,zoning,coverage,url,prediction,predicted_zoning
4052,4052,res_low,0.1071,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.605, 0.397]]",res_low
4053,4053,res_low,0.089,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.761, 0.0, 0.0, 0.0, 0.233, 0.0]]",com_med
5053,5053,res_low,0.21,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.001, 0.0]]",res_low
6041,6041,res_low,0.1099,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.005, 0.0]]",res_low
6042,6042,res_low,0.0946,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.205, 0.207, 0.19, 0.0, 0.387, 0.0]]",res_low


### Create Directories to Hold Predicted Images (Only Run Once)

In [None]:
# total_zoning_types = predict_df["zoning"].unique()
# for zoning_type in total_zoning_types:
#     zoning_predictions = list(predict_df["predicted_zoning"][predict_df["zoning"]==zoning_type].unique())
#     os.mkdir(f"../02_Data/07_Predictions/{zoning_type}")
#     for zone_predict in zoning_predictions:
#         os.mkdir(f"../02_Data/07_Predictions/{zoning_type}/{zone_predict}")

### Function - Separate Files on Prediction

In [61]:
def separate_files(row):
    image_code = row[0]
    zone_true = row[1]
    zone_predict = row[2]
    origin = f"../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_{image_code}.png"
    destination = f"../02_Data/07_Predictions/{zone_true}/{zone_predict}/Las_Vegas_{image_code}.png"
    copyfile(origin, destination)

### Separate Files by Prediction
Create a Tuple column holding the three pieces of data needed to move files

In [67]:
predict_df["move_file_tuple"] = list(zip(predict_df["code"],predict_df["zoning"],predict_df["predicted_zoning"]))
predict_df.head()

Unnamed: 0,code,zoning,coverage,url,prediction,predicted_zoning,move_file_tuple
4052,4052,res_low,0.1071,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.605, 0.397]]",res_low,"(004052, res_low, res_low)"
4053,4053,res_low,0.089,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.761, 0.0, 0.0, 0.0, 0.233, 0.0]]",com_med,"(004053, res_low, com_med)"
5053,5053,res_low,0.21,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.001, 0.0]]",res_low,"(005053, res_low, res_low)"
6041,6041,res_low,0.1099,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.005, 0.0]]",res_low,"(006041, res_low, res_low)"
6042,6042,res_low,0.0946,../02_Data/03_Images/02_PSD_Processed/05_0.25x...,"[[0.0, 0.205, 0.207, 0.19, 0.0, 0.387, 0.0]]",res_low,"(006042, res_low, res_low)"


In [68]:
predict_df["move_file_tuple"].apply(separate_files)

004052    None
004053    None
005053    None
006041    None
006042    None
010060    None
010061    None
011056    None
011057    None
011058    None
011060    None
011061    None
012056    None
012062    None
013056    None
013059    None
013060    None
013062    None
014058    None
014062    None
015056    None
015057    None
015058    None
015060    None
015082    None
015084    None
015085    None
016056    None
016059    None
016080    None
          ... 
226127    None
226128    None
226129    None
226130    None
226132    None
226133    None
226134    None
226135    None
226136    None
227122    None
227123    None
227124    None
227125    None
227126    None
227127    None
227128    None
227129    None
227132    None
228123    None
228124    None
228131    None
228132    None
228133    None
228134    None
229128    None
229129    None
229130    None
229131    None
229132    None
230127    None
Name: move_file_tuple, Length: 7533, dtype: object

### Save DataFrame as a CSV for Reference

In [72]:
predict_df.to_csv("../02_Data/07_Predictions/z_prediction.csv")
data_df.to_csv("../02_Data/07_Predictions/z_zoning_bldg_coverage.csv")