In [1]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

In [2]:
#read in csv data
austin = pd.read_csv('data/austin.csv', index_col = None)
austin = austin.drop('Unnamed: 0', axis = 1)
austin

Unnamed: 0,image_name,zoning_id,zoning_zty
0,data/austin_0.jpg,229791,PUD
1,data/austin_1.jpg,229791,PUD
2,data/austin_2.jpg,400523442,I-RR
3,data/austin_3.jpg,229791,PUD
4,data/austin_4.jpg,229791,PUD
...,...,...,...
3662,data/austin_3662.jpg,136346,PUD
3663,data/austin_3663.jpg,136346,PUD
3664,data/austin_3664.jpg,136346,PUD
3665,data/austin_3665.jpg,136346,PUD


In [3]:
#look at how many values there are
austin['zoning_zty'].value_counts()

PUD                  505
SF-2                 292
I-RR                 275
SF-3-NP              242
P                    222
                    ... 
P-NCCD-NP              1
SF-3-NCCD-NP-HD        1
MF-4-NCCD-NP           1
SF-3-H-HD-NCCD-NP      1
I-MF-2                 1
Name: zoning_zty, Length: 167, dtype: int64

There are far too many unique classes, so we will combine them into 4 basic classes: residential, commercial, industrial, and special. Zones retrieved from:
https://www.austintexas.gov/sites/default/files/files/Planning/CodeNEXT/Draft_3_LDC_Guide.pdf

In [4]:
residential = ['LA',
            'RR',
            'SF-1',
            'SF-2',
            'SF-3',
            'SF-4A',
            'SF-4-A,'
            'SF-4B',
            'SF-5',
            'SF-6',
            'MF-1',
            'MF-2',
            'MF-3',
            'MF-4',
            'MF-5',
            'MF-6',
            'MF-CO',
            'MH']

commercial = ['NO',
            'LO',
            'GO',
            'CR',
            'LR',
            'GR',
            'L',
            'CBD',
            'DMU',
            'W/LO',
            'CS',
            'CS-1',
            'CH']

industrial = ['IP', 'LI', 'MI', 'R&D']

special = ['DR', 'AV', 'AG', 'P', 'PUD', 'TN', 'ERC', 'UNZ']

In [5]:
#create new column called "zone"
austin.loc[:, 'zone'] = np.nan

In [6]:
#loop through list of zone codes, map specific zone codes to R, C, I, or S
for i in residential:
    austin.loc[:, 'zone'] = np.where(austin.loc[:, 'zoning_zty'].str.contains(i) == True, 
                                     'R', austin.loc[:,'zone'])
for i in industrial:
    austin.loc[:, 'zone'] = np.where(((austin.loc[:, 'zoning_zty'].str.contains(i) == True) & (austin.loc[:, 'zone'] != 'R')), 
                                     'I', austin.loc[:,'zone'])
for i in commercial:
    austin.loc[:, 'zone'] = np.where((austin.loc[:, 'zoning_zty'].str.contains(i) == True) & \
                                     (austin.loc[:, 'zone'] != 'R') & (austin.loc[:, 'zone'] != 'I'), 
                                     'C', austin.loc[:,'zone'])
for i in special:
    austin.loc[:, 'zone'] = np.where((austin.loc[:, 'zoning_zty'].str.contains(i) == True) & \
                                     (austin.loc[:, 'zone'] != 'I') & (austin.loc[:, 'zone'] != 'R') &\
                                     (austin.loc[:, 'zone'] != 'C'), 
                                     'S', austin.loc[:,'zone'])

In [7]:
#check values
austin['zone'].value_counts()

R      1746
S      1235
C       446
I       239
nan       1
Name: zone, dtype: int64

In [8]:
#which one is still nan?
austin[austin['zone'] == 'nan']

Unnamed: 0,image_name,zoning_id,zoning_zty,zone
303,data/austin_303.jpg,120090,SF-4-A,


In [9]:
#manually set this one
austin.loc[303, 'zone'] = 'R'

In [10]:
austin['zone'].value_counts()

R    1747
S    1235
C     446
I     239
Name: zone, dtype: int64