In [24]:
from collections import defaultdict
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
import shutil
import string
import os

In [25]:
original_image = mpimg.imread("../02_Data/04_Zoning_Maps/Las_Vegas_MSA.png")
print(original_image.shape)

(4562, 4259, 3)


### Global Variables

In [26]:
cols = 224
rows = 240

min_zoning_threshold = .25     # this sets the minimum percent that a zoning type should cover before an image
                               # is assigned to that type. It is used in cell 'Zoning - Processing Function'
min_coverage_threshold = 0.05  # this variable sets the minimum building coverage in each image. It is used in
                               # cell 'Site Coverage - EDA'

### Zoning - Internal Variables

In [27]:
# RGB values mapped to program type
red = 0
green = 1
blue = 2

# keys for dictionary
commercial_high = (red, round(10*255/255))
commercial_med  = (red, round(10*200/255))
industrial_high = (green, round(10*75/255))
industrial_office_med = (green, round(10*150/255))
res_low  = (blue, round(10*255/255))
res_med  = (blue, round(10*200/255))
res_high = (blue, round(10*100/255))

# zoning_list
zoning_list = [commercial_high,commercial_med,industrial_high,
               industrial_office_med,res_low,res_med,res_high]
zoning_names = ["com_high","com_med","ind_high","ind_office_med","res_low","res_med","res_high"]
zoning_names_dict = dict(zip(zoning_list,zoning_names))

# Column/Row Coordinates
col_offset_float = original_image.shape[1]/cols
row_offset_float = original_image.shape[0]/rows

# Subdivided Image Size
col_pixels = int(col_offset_float)
row_pixels = int(row_offset_float)

### Zoning - Processing Function

In [28]:
def get_zoning(image_array):
    rgb_dict = defaultdict(int)
    
    for i in range(image_array.shape[0]):       # for the row dimension
        for j in range(image_array.shape[1]):   # for the column dimension
            pixel_rgb = image_array[i][j]
            if sum(pixel_rgb) > 1.1:            # this should exclude (1) noise and (2) white pixels (255,255,255)
                pass                            # correct pixels should never exceed 1 when summed (255,0,0)
            else:
                dominant_channel = np.argmax(image_array[i][j])                             # is the main channel value R,G or B?
                dominant_channel_value = int(round(10*image_array[i][j][dominant_channel])) # and what is the value? round to clean out noise
                rgb_dict[(dominant_channel,dominant_channel_value)] += 1                    # dictionary of channel + value, increment
                
    zoning_dict = defaultdict(int)

    for zoning in zoning_list:                  # extract the relevant values corresponding to zoning pixels
        new_key = zoning_names_dict[zoning]
        zoning_dict[new_key] = rgb_dict[zoning] # remap to new dictionary
    
    zoning_type, count = list(zoning_dict.keys()), list(zoning_dict.values())   # unzip to find largest value
    max_position = np.argmax(count)
    dominant_zoning = zoning_type[max_position]                                 # grab index of largest count
                                                                                # and find zoning type
    
    total_pixels = image_array.shape[0]*image_array.shape[1]
    if count[max_position] < (total_pixels * min_zoning_threshold):             # check if the dominant zoning
        dominant_zoning = "none"                                                # type meets a minimum threshold
    
    return dominant_zoning

### Zoning - Processing Loop

In [29]:
new_image = np.zeros(shape=(row_pixels,col_pixels,3))
image_dict = defaultdict(list)

for r in range(rows):                                   # go through the row coordinates
    start_pixel_row = int(r*row_offset_float)          
    for c in range(cols):                               # go through the column coordinates
        start_pixel_col = int(c*col_offset_float)       # define the start and end of the column pixels
        end_pixel_col = start_pixel_col + col_pixels   
        for rp in range(row_pixels):                    # and go row by row within the coordinate grid
            new_image[rp] = original_image[start_pixel_row+rp][start_pixel_col:end_pixel_col]
        
        row_name = ("00"+str(r))[-3:]
        col_name = ("00"+str(c))[-3:]
        image_dict[row_name+col_name] = [get_zoning(new_image)]

### Zoning - EDA
How many of each Zoning Type are there? 

In [30]:
zone_dict = defaultdict(int)

for square in list(image_dict.values()):
    zone_dict[square[0]] += 1

zone_dict

defaultdict(int,
            {'none': 37244,
             'ind_high': 1310,
             'res_low': 9754,
             'ind_office_med': 1351,
             'com_med': 1861,
             'res_med': 970,
             'res_high': 760,
             'com_high': 510})

### Create DataFrame

In [31]:
grid_df = pd.DataFrame.from_dict(image_dict, orient='index') 
grid_df = grid_df.rename({0:"zoning"},axis=1)
grid_df["code"] = grid_df.index.get_values()
grid_df = grid_df[["code","zoning"]]
grid_df.head()

Unnamed: 0,code,zoning
0,0,none
1,1,none
2,2,none
3,3,none
4,4,none


### Site Coverage - Function

In [32]:
def get_coverage(row):
    total = 8053.7   # I calcluated this manually since the white is not 100% 255,255,255 white. If 
                     # the image processing steps changes to a more white image, this value will have to change
    fragment = mpimg.imread(f"../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_{row}.png")
    pixel_count = sum(sum(fragment))[0]   # just add all the values together
    return round(1 - pixel_count/total,4) # round it to 4 decimal places

In [33]:
# This code finds the value for the variable total (8053.7)
# fragment = mpimg.imread("../02_Data/03_Images/02_PSD_Processed/05_0.25x0.25-0.125x0.125_Square/01_Renamed/Las_Vegas_000000.png")
# pixel_count = sum(sum(fragment))[0]
# pixel_count

### Site Coverage - Apply Function

In [34]:
grid_df["coverage"] = grid_df["code"].apply(get_coverage)

In [44]:
grid_df["coverage"] = grid_df["coverage"].astype(float)    # convert to a float (was a string)

### Site Coverage - EDA

In [45]:
grid_df.head()

Unnamed: 0,code,zoning,coverage
0,0,none,0.0
1,1,none,0.0
2,2,none,0.0
3,3,none,0.0
4,4,none,0.0


Number of plots with at least the minimum zoning and at least the minimum building coverage

In [46]:
zoned_lots_w_bldgs_df = grid_df[(grid_df["zoning"] != 'none') & (grid_df["coverage"] > min_coverage_threshold)] 
zoned_lots_w_bldgs_df.shape

(11279, 3)

In [47]:
zoned_lots_w_bldgs_df.head()

Unnamed: 0,code,zoning,coverage
3048,3048,res_low,0.3066
3049,3049,res_low,0.2797
3080,3080,res_low,0.0564
4045,4045,res_low,0.0895
4046,4046,res_low,0.0801


How many of each zoning type do I have images of with built buildings?

In [48]:
zoned_lots_w_bldgs_df["zoning"].value_counts()

res_low           6913
com_med           1364
res_med            817
res_high           696
ind_office_med     590
ind_high           482
com_high           417
Name: zoning, dtype: int64

### Organize Files - Function

In [49]:
def organize(row):
    coordinate = row
    zoning_cat = grid_df["zoning"][coordinate]
    source_loc = f"../02_Data/05_Data_for_Modelling/01_Renamed/Las_Vegas_{coordinate}.png"
    destination_loc = f"../02_Data/05_Data_for_Modelling/{zoning_cat}/Las_Vegas_{coordinate}.png"
    empty_loc = f"../02_Data/05_Data_for_Modelling/empty/Las_Vegas_{coordinate}.png"
    if grid_df["coverage"][row] >= min_coverage_threshold:     # organize images based on their zoning category
        os.rename(source_loc, destination_loc)                 # except if there is below a min building coverage
    else: os.rename(source_loc, empty_loc)                     # then put these min images in an empty folder

### Organize Files - Apply Function

In [50]:
grid_df["code"].apply(organize)

000000    None
000001    None
000002    None
000003    None
000004    None
000005    None
000006    None
000007    None
000008    None
000009    None
000010    None
000011    None
000012    None
000013    None
000014    None
000015    None
000016    None
000017    None
000018    None
000019    None
000020    None
000021    None
000022    None
000023    None
000024    None
000025    None
000026    None
000027    None
000028    None
000029    None
          ... 
239194    None
239195    None
239196    None
239197    None
239198    None
239199    None
239200    None
239201    None
239202    None
239203    None
239204    None
239205    None
239206    None
239207    None
239208    None
239209    None
239210    None
239211    None
239212    None
239213    None
239214    None
239215    None
239216    None
239217    None
239218    None
239219    None
239220    None
239221    None
239222    None
239223    None
Name: code, Length: 53760, dtype: object