In [146]:
import pandas as pd
import numpy as np
import json
from pprint import pprint
from sqlalchemy import create_engine
from sklearn.neighbors import NearestNeighbors

# Set pandas to display numbers with commas
pd.options.display.float_format = '{:,}'.format

# Prep Data and Create DB

### Import and Clean Data

In [147]:
# import and clean data

pluto_df = pd.read_csv("pluto.csv")
pluto_df.sort_values(by="zipcode", inplace=True)
pluto_df = pluto_df.reset_index(drop=True).copy()
pluto_df.dropna(inplace=True)
pluto_df.head()

Unnamed: 0,zipcode,borough,borocode,landuse,bldgarea,lotarea
0,10001.0,MN,1.0,5.0,611625.0,23601.0
1,10001.0,MN,1.0,4.0,13489.0,2469.0
2,10001.0,MN,1.0,5.0,34000.0,17773.0
3,10001.0,MN,1.0,11.0,0.0,0.0
4,10001.0,MN,1.0,5.0,2008.0,1980.0


In [148]:
#One hot encoding for landuse

dummy_df = pd.get_dummies(pluto_df.landuse)
pluto_df.drop("landuse", axis=1, inplace=True)
data_df = pluto_df.join(dummy_df)
data_df.head()

Unnamed: 0,zipcode,borough,borocode,bldgarea,lotarea,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0
0,10001.0,MN,1.0,611625.0,23601.0,0,0,0,0,1,0,0,0,0,0,0
1,10001.0,MN,1.0,13489.0,2469.0,0,0,0,1,0,0,0,0,0,0,0
2,10001.0,MN,1.0,34000.0,17773.0,0,0,0,0,1,0,0,0,0,0,0
3,10001.0,MN,1.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,1
4,10001.0,MN,1.0,2008.0,1980.0,0,0,0,0,1,0,0,0,0,0,0


In [149]:
# Create column with dummy values that can be counted during groupby
# These will be used later to cull mini zipcodes in Manhattan with only a couple of buildings
data_df["bldg_count"] = data_df["zipcode"]

In [150]:
# Associate bldgarea with landuse values

cols = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
for col in cols:
    data_df[col] = data_df[col] * data_df.bldgarea
data_df.drop("bldgarea", axis=1, inplace=True)
data_df.head()

Unnamed: 0,zipcode,borough,borocode,lotarea,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,bldg_count
0,10001.0,MN,1.0,23601.0,0.0,0.0,0.0,0.0,611625.0,0.0,0.0,0.0,0.0,0.0,0.0,10001.0
1,10001.0,MN,1.0,2469.0,0.0,0.0,0.0,13489.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10001.0
2,10001.0,MN,1.0,17773.0,0.0,0.0,0.0,0.0,34000.0,0.0,0.0,0.0,0.0,0.0,0.0,10001.0
3,10001.0,MN,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10001.0
4,10001.0,MN,1.0,1980.0,0.0,0.0,0.0,0.0,2008.0,0.0,0.0,0.0,0.0,0.0,0.0,10001.0


In [151]:
# Create summary data

data_df.drop("borough", axis=1, inplace=True)
landuse_df = data_df.groupby("zipcode").agg({'borocode':'median',
                                             'lotarea': 'sum',
                                             1.0:'sum',
                                             2.0:'sum',
                                             3.0:'sum',
                                             4.0:'sum', 
                                             5.0:'sum',
                                             6.0:'sum',
                                             7.0:'sum',
                                             8.0:'sum',
                                             9.0:'sum',
                                             10.0:'sum',
                                             11.0:'sum',
                                             'bldg_count': 'count'
                                            })
landuse_df.reset_index(inplace=True)
landuse_df.head()

Unnamed: 0,zipcode,borocode,lotarea,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,bldg_count
0,10001.0,1.0,12931894.0,33606.0,592289.0,8350224.0,13875477.0,53005101.0,4445633.0,1182594.0,10895555.0,1152624.0,616150.0,0.0,980
1,10002.0,1.0,18127897.0,51117.0,1938379.0,16618352.0,16356822.0,3200967.0,1182700.0,590077.0,4618645.0,87067.0,252263.0,0.0,1672
2,10003.0,1.0,9648834.0,515395.0,3786590.0,12746613.0,15687708.0,10760299.0,714504.0,3459.0,9501989.0,126922.0,159151.0,0.0,1864
3,10004.0,1.0,12027534.0,0.0,0.0,725023.0,4194913.0,21117265.0,85472.0,462543.0,3722844.0,945425.0,87775.0,0.0,112
4,10005.0,1.0,1934277.0,0.0,0.0,2480663.0,5683041.0,16515733.0,0.0,682450.0,43568.0,0.0,0.0,0.0,64


In [152]:
# Relabel borocode so easy to remember (had to drop above b/c groupby only accepts numeric columns)
boro_dict = {"Manhattan": 1.0, 
             "Bronx": 2.0, 
             "Brooklyn": 3.0, 
             "Queens": 4.0, 
             "Staten Island": 5.0}

def relabel(num):
    boros = list(boro_dict.keys())
    values = list(boro_dict.values())    
    return boros[values.index(num)]

landuse_df['borocode'] = landuse_df['borocode'].apply(relabel)

In [153]:
# Reorder colummns
cols_dict = {1.0: "1 & 2 Family Residential",
             2.0: "Multi-Family Walk-up",
             3.0: "Multi-Family Elevator",
             4.0: "Mixed Residential & Commercial",
             5.0: "Commercial & Office",
             6.0: "Industrial & Manufacturing",
             7.0: "Transportation & Utility",
             8.0: "Public Facilities & Institutions",
             9.0: "Open Space & Outdoor Recreation",
             10.0: "Parking",
             11.0: "Vacant Land"}

landuse_df.rename(columns = cols_dict, inplace=True)

In [159]:
# Remove miniscule zipcodes from Manhattan only (lots of 1-10 building mini-zipcodes)
landuse_df = landuse_df[(landuse_df.bldg_count >= 10) | (landuse_df.borocode != "Manhattan")].copy()
landuse_df.drop('bldg_count', axis=1, inplace=True)

In [160]:
landuse_df.head()

Unnamed: 0,zipcode,borocode,lotarea,1 & 2 Family Residential,Multi-Family Walk-up,Multi-Family Elevator,Mixed Residential & Commercial,Commercial & Office,Industrial & Manufacturing,Transportation & Utility,Public Facilities & Institutions,Open Space & Outdoor Recreation,Parking,Vacant Land
0,10001.0,Manhattan,12931894.0,33606.0,592289.0,8350224.0,13875477.0,53005101.0,4445633.0,1182594.0,10895555.0,1152624.0,616150.0,0.0
1,10002.0,Manhattan,18127897.0,51117.0,1938379.0,16618352.0,16356822.0,3200967.0,1182700.0,590077.0,4618645.0,87067.0,252263.0,0.0
2,10003.0,Manhattan,9648834.0,515395.0,3786590.0,12746613.0,15687708.0,10760299.0,714504.0,3459.0,9501989.0,126922.0,159151.0,0.0
3,10004.0,Manhattan,12027534.0,0.0,0.0,725023.0,4194913.0,21117265.0,85472.0,462543.0,3722844.0,945425.0,87775.0,0.0
4,10005.0,Manhattan,1934277.0,0.0,0.0,2480663.0,5683041.0,16515733.0,0.0,682450.0,43568.0,0.0,0.0,0.0


In [161]:
# Reorder columns
cols = ['zipcode',
        'borocode',
        'lotarea',
        '1 & 2 Family Residential',
        'Multi-Family Walk-up',
        'Multi-Family Elevator',
        'Mixed Residential & Commercial',
        'Commercial & Office',
        'Industrial & Manufacturing',
        'Transportation & Utility',
        'Public Facilities & Institutions',
        'Open Space & Outdoor Recreation',
        'Parking',
        'Vacant Land']

landuse_df = landuse_df[cols]

In [165]:
landuse_df[landuse_df.zipcode == 10471]

Unnamed: 0,zipcode,borocode,lotarea,1 & 2 Family Residential,Multi-Family Walk-up,Multi-Family Elevator,Mixed Residential & Commercial,Commercial & Office,Industrial & Manufacturing,Transportation & Utility,Public Facilities & Institutions,Open Space & Outdoor Recreation,Parking,Vacant Land
101,10471.0,Bronx,51465261.0,4559227.0,989991.0,7230984.0,376117.0,308906.0,0.0,3032.0,3419893.0,151330.0,1645.0,0.0


### Normalize data and remove zipcodes not shared by PLUTO and zipcode GeoJSON

In [99]:
geo_path = "../nearest_neighborhood/static/geojson/zipcodes.geojson"

with open(geo_path) as f:
    d = json.load(f)
    pprint(d)

{'features': [{'geometry': {'coordinates': [[[-73.86942457284175,
                                              40.74915687096787],
                                             [-73.89507143240856,
                                              40.74646547081214],
                                             [-73.89618737867819,
                                              40.74850942518086],
                                             [-73.89583954185139,
                                              40.748546875706005],
                                             [-73.89525242774396,
                                              40.74830660945023],
                                             [-73.89654041085561,
                                              40.750541998143575],
                                             [-73.89579868613828,
                                              40.750619721332605],
                                             [-73.89652230661433,
       

                                             [-73.7337734219063,
                                              40.74286441525263],
                                             [-73.73124540301161,
                                              40.744881626424565],
                                             [-73.7300508313049,
                                              40.74620598219407],
                                             [-73.72912929626756,
                                              40.7476056255959],
                                             [-73.72827633138401,
                                              40.74991386618404],
                                             [-73.72763949821193,
                                              40.75092126949652],
                                             [-73.72690383377657,
                                              40.75167704941076],
                                             [-73.7242148710979,
             

                                             [-73.83079050446555,
                                              40.71419918262998],
                                             [-73.826059565848,
                                              40.71539860277048]]],
                            'type': 'Polygon'},
               'id': 17,
               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/11415',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '081',
                              'OBJECTID': 18,
                              'PO_NAME': 'Kew Gardens',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 16802279.5098,
                              'Shape_Leng': 21074.6515709,
                              'borough': 'Queens',
                              'postalCode': '11415'},
               'type': 'Feature'},
              {'ge

                              'borough': 'Brooklyn',
                              'postalCode': '11216'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.84430243497147,
                                              40.689443010406286],
                                             [-73.83754416389462,
                                              40.69136416111784],
                                             [-73.83551869361439,
                                              40.68724711425137],
                                             [-73.83524646584192,
                                              40.68732463108827],
                                             [-73.83426544717621,
                                              40.68532735819576],
                                             [-73.84680064291166,
                                              40.6818350215038],
                                             [-73.8539238905089,
 

               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.95839960006599,
                                              40.63632695865279],
                                             [-73.95374260369759,
                                              40.638566135216195],
                                             [-73.9536000492334,
                                              40.63842236218583],
                                             [-73.95129131136085,
                                              40.63841947128971],
                                             [-73.93873124101145,
                                              40.639200188016986],
                                             [-73.9384801990423,
                                              40.63686200243636],
                                             [-73.93560658290333,
                                              40.63703792051617],
                                         

                                             [-73.7964084551202,
                                              40.88483536376369],
                                             [-73.79440353230922,
                                              40.88319683668171],
                                             [-73.79301155164535,
                                              40.883301064745325],
                                             [-73.79328105763243,
                                              40.882703862680344],
                                             [-73.79351673194832,
                                              40.882705547033524],
                                             [-73.79373918946594,
                                              40.88242101749688],
                                             [-73.79392940563373,
                                              40.88175782745167],
                                             [-73.79457788006744,
        

                                             [-73.79805308400206,
                                              40.851870831220964],
                                             [-73.79799796435364,
                                              40.851591755435685],
                                             [-73.79817652739422,
                                              40.851482457156045],
                                             [-73.79837060433358,
                                              40.85076156395756],
                                             [-73.79963310975806,
                                              40.84982487172783],
                                             [-73.79993291800288,
                                              40.84879900705984],
                                             [-73.80043829101623,
                                              40.84810632716163],
                                             [-73.80069583425721,
       

               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10034',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '061',
                              'OBJECTID': 50,
                              'PO_NAME': 'Inwood',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 24503892.12,
                              'Shape_Leng': 28233.7535201,
                              'borough': 'Manhattan',
                              'postalCode': '10034'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.93213126627553,
                                              40.869450652750444],
                                             [-73.93127671033326,
                                              40.86884361367126],
                                             [-73.93453048454519,
                            

                                              40.824388462137975],
                                             [-73.81175936048655,
                                              40.82434174764735],
                                             [-73.81167193514068,
                                              40.82468724381751],
                                             [-73.81120529572054,
                                              40.824595528711185],
                                             [-73.81121511194902,
                                              40.82479746967215],
                                             [-73.81103489157742,
                                              40.824708316983546],
                                             [-73.81086641161545,
                                              40.82515781502031],
                                             [-73.81041148489567,
                                              40.824990670431426],
      

               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10464',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '005',
                              'OBJECTID': 57,
                              'PO_NAME': 'Pelham Bay Park',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 11587953.8276,
                              'Shape_Leng': 26773.6344401,
                              'borough': 'Bronx',
                              'postalCode': '10464'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.84218235210301,
                                              40.83537416906358],
                                             [-73.8454272197241,
                                              40.83493650587198],
                                             [-73.8458153111719,
                        

                                              40.826294535013176],
                                             [-73.91211594116584,
                                              40.827885972661136],
                                             [-73.91194860236519,
                                              40.827835474895366],
                                             [-73.91127397045084,
                                              40.829101492957385],
                                             [-73.91030770499657,
                                              40.82877250411782],
                                             [-73.91169430267045,
                                              40.82775873382422],
                                             [-73.91179361330045,
                                              40.82642818162548],
                                             [-73.9113064353643,
                                              40.82484260023421],
       

                                             [-73.96484783057795,
                                              40.80670634773313],
                                             [-73.9620082796616,
                                              40.80550928432515],
                                             [-73.96105362531289,
                                              40.80682096756859],
                                             [-73.9593567476704,
                                              40.80612090623688]]],
                            'type': 'Polygon'},
               'id': 78,
               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10025',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '061',
                              'OBJECTID': 79,
                              'PO_NAME': 'Manhattan Valley',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
       

               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10128',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '061',
                              'OBJECTID': 88,
                              'PO_NAME': 'Carnegie Hill',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 11011681.5568,
                              'Shape_Leng': 15005.0963232,
                              'borough': 'Manhattan',
                              'postalCode': '10128'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.88509634340522,
                                              40.778464378189874],
                                             [-73.88525027297052,
                                              40.77861292455439],
                                             [-73.88505675867148,
                   

                                              40.75699193428497],
                                             [-73.7224171269723,
                                              40.755103796510625],
                                             [-73.72306624969382,
                                              40.75427512661749],
                                             [-73.7242148710979,
                                              40.75333168590093],
                                             [-73.72641523108936,
                                              40.75208712140159],
                                             [-73.72763949821193,
                                              40.75092126949652],
                                             [-73.72841024781373,
                                              40.749619738557314],
                                             [-73.72912929626756,
                                              40.7476056255959],
           

               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/11364',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '081',
                              'OBJECTID': 109,
                              'PO_NAME': 'Oakland Gardens',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 84259665.0533,
                              'Shape_Leng': 37488.6771909,
                              'borough': 'Queens',
                              'postalCode': '11364'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-74.00170164222186,
                                              40.76138075542977],
                                             [-73.99395968106315,
                                              40.758252769648045],
                                             [-73.99273700488898,
                   

                                              40.72743584259512],
                                             [-73.9714961156689,
                                              40.72741848314708],
                                             [-73.97176935741716,
                                              40.72581105101681],
                                             [-73.973476987278,
                                              40.718879038813014],
                                             [-73.974869782192,
                                              40.715177229896184],
                                             [-73.97669586935731,
                                              40.71155932662336],
                                             [-73.9779941940137,
                                              40.71056447085009],
                                             [-73.98128866027025,
                                              40.71022341586406],
              

                                              40.708196844723716],
                                             [-73.99922642801454,
                                              40.70787485413706]]],
                            'type': 'Polygon'},
               'id': 132,
               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10038',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '061',
                              'OBJECTID': 133,
                              'PO_NAME': 'Financial District',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 7022760.6429,
                              'Shape_Leng': 12605.2624283,
                              'borough': 'Manhattan',
                              'postalCode': '10038'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.93480281087365,
    

                                              40.68962191204708],
                                             [-73.97268745886024,
                                              40.68741521219588],
                                             [-73.97364817218354,
                                              40.68730263517516],
                                             [-73.97337441103068,
                                              40.6858843937325],
                                             [-73.97227444657481,
                                              40.685442737240315],
                                             [-73.97174180986187,
                                              40.68264604091808],
                                             [-73.97056593435647,
                                              40.68239711329087],
                                             [-73.97244491752706,
                                              40.67716202563518],
          

                                             [-73.83121793120462,
                                              40.649315257885654],
                                             [-73.8314502686804,
                                              40.64821693894233],
                                             [-73.83363759484132,
                                              40.648121062168755],
                                             [-73.83521937724525,
                                              40.648324954869565],
                                             [-73.83547876925988,
                                              40.64862143677511],
                                             [-73.83557801368863,
                                              40.649124567316775],
                                             [-73.83620914161023,
                                              40.64967784812698],
                                             [-73.8360887733297,
        

                                              40.64447364130024],
                                             [-73.8865450386075,
                                              40.64430646457873],
                                             [-73.88607208561329,
                                              40.643695450605996],
                                             [-73.88495499059621,
                                              40.64326381028618],
                                             [-73.88395362135941,
                                              40.64265363995645],
                                             [-73.88063556693574,
                                              40.63962985448538],
                                             [-73.88052765754934,
                                              40.639136317212454],
                                             [-73.8795373482981,
                                              40.63823185586402],
          

                                             [-74.14384005274948,
                                              40.63889470130579],
                                             [-74.1438396553383,
                                              40.63917538753179],
                                             [-74.14357309292765,
                                              40.63922679634125],
                                             [-74.14336103096655,
                                              40.63970986379233],
                                             [-74.14257770031142,
                                              40.63953415662728],
                                             [-74.14238795196613,
                                              40.63964122129723],
                                             [-74.14229767589491,
                                              40.64030581843898],
                                             [-74.14155623872226,
           

                                             [-73.81564157857022,
                                              40.6126956307473],
                                             [-73.81542979838605,
                                              40.611653994320896],
                                             [-73.81505608863,
                                              40.61094041964456],
                                             [-73.81464369100392,
                                              40.61086304247208],
                                             [-73.81433216393533,
                                              40.61127204544757],
                                             [-73.8142142441882,
                                              40.61114284954625],
                                             [-73.8145180191947,
                                              40.610737054646954],
                                             [-73.81507140306246,
              

                                              40.58107215592544],
                                             [-74.20553530728458,
                                              40.58111075306791],
                                             [-74.20526040831358,
                                              40.581449119307294],
                                             [-74.2046115284568,
                                              40.583534493370166],
                                             [-74.20456344526687,
                                              40.584042135322996],
                                             [-74.20475039445768,
                                              40.58428838797892],
                                             [-74.2044670241014,
                                              40.584592588720156],
                                             [-74.20455913556694,
                                              40.58519065321036],
        

                                              40.60724420500403]]],
                            'type': 'Polygon'},
               'id': 177,
               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/11214',
                              'BLDGpostalCode': 0,
                              'CTY_FIPS': '047',
                              'OBJECTID': 178,
                              'PO_NAME': 'Bath Beach',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 61096539.3863,
                              'Shape_Leng': 52861.7542475,
                              'borough': 'Brooklyn',
                              'postalCode': '11214'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.74690978489082,
                                              40.611725181962406],
                                             [-73.74654302449439,
            

                                              40.583508297086325],
                                             [-73.94304894851457,
                                              40.58349353714264],
                                             [-73.94256212579002,
                                              40.583126753660174],
                                             [-73.94319558734155,
                                              40.58348951955543],
                                             [-73.94354535905136,
                                              40.58347080796538],
                                             [-73.94309334565297,
                                              40.583134558085675],
                                             [-73.9436915268201,
                                              40.583467833220936],
                                             [-73.94406766839589,
                                              40.58347202427837],
       

                                              40.58096251282502],
                                             [-74.00581162069466,
                                              40.58187727230928],
                                             [-74.00502963141358,
                                              40.58171915200376],
                                             [-74.00397328589776,
                                              40.58183416957294],
                                             [-73.99886476427345,
                                              40.58150273903509],
                                             [-73.99824954654119,
                                              40.58130847480347],
                                             [-73.99787110081085,
                                              40.58064972617592],
                                             [-73.9974018191112,
                                              40.580503040984475],
          

                                              40.50190662532063],
                                             [-74.2250035441684,
                                              40.501368424309895],
                                             [-74.22542677797527,
                                              40.50163266994578],
                                             [-74.22681775411455,
                                              40.50204010812477],
                                             [-74.22814235232524,
                                              40.50202397885105],
                                             [-74.23118202003275,
                                              40.51089576320519],
                                             [-74.23260169485205,
                                              40.51583919497924],
                                             [-74.23282673610318,
                                              40.51637659578472],
          

                                              40.706124534645234],
                                             [-74.00821833819428,
                                              40.70654131593431]]],
                            'type': 'Polygon'},
               'id': 220,
               'properties': {'@id': 'http://nyc.pediacities.com/Resource/PostalCode/10270',
                              'BLDGpostalCode': 1,
                              'CTY_FIPS': '061',
                              'OBJECTID': 221,
                              'PO_NAME': 'Financial District',
                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 31255.8004282,
                              'Shape_Leng': 753.005571394,
                              'borough': 'Manhattan',
                              'postalCode': '10270'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-73.97826208547069,
   

                              'STATE': 'NY',
                              'ST_FIPS': '36',
                              'Shape_Area': 67350.3889582,
                              'Shape_Leng': 1067.98354189,
                              'borough': 'Manhattan',
                              'postalCode': '10285'},
               'type': 'Feature'},
              {'geometry': {'coordinates': [[[-74.00936106903438,
                                              40.70641810896439],
                                             [-74.00910683224994,
                                              40.70669129947036],
                                             [-74.00884319868976,
                                              40.70652625630068],
                                             [-74.00908463334824,
                                              40.70626062267263],
                                             [-74.00936106903438,
                                              40.7064

In [100]:
# Create new df from zipcode GeoJSON

geo_path = "../nearest_neighborhood/static/geojson/zipcodes.geojson"

zips = []
areas = []
neighborhood = []
ids = []

with open(geo_path) as f:
    d = json.load(f)
    #pprint(d)
    for obj in d['features']:
        zips.append(obj['properties']['postalCode'])
        areas.append(obj['properties']['Shape_Area'])
        neighborhood.append(obj['properties']['PO_NAME'])
        ids.append(obj['id'])
        
data = {"zipcode": zips, "id": ids, "area": areas, "neighborhood": neighborhood}
geo_df = pd.DataFrame.from_dict(data)

geo_df.sort_values("zipcode", inplace=True)
geo_df.reset_index(drop=True, inplace=True)

In [103]:
print(geo_df.to_string())

    zipcode   id            area                         neighborhood
0     00083   82 38,300,990.4037                         Central Park
1     10001  113 17,794,940.7729                      NOMAD / Chelsea
2     10002  123 26,280,128.5931                      Lower East Side
3     10003  121 15,538,376.2738                  NOHO / Union Square
4     10004  146   670,708.02007                        Bowling Green
5     10004  145 7,679,615.51128                        Bowling Green
6     10004  141 1,202,707.70742                        Bowling Green
7     10004  138 4,001,782.18505                        Bowling Green
8     10005  136 2,082,901.14074                          Wall Street
9     10006  134 1,716,640.90186                      Lower Manhattan
10    10007  129 5,328,635.48537                            City Hall
11    10009  125 15,903,519.5061                         East Village
12    10010  118 9,768,395.37543                 Flat Iron / Gramercy
13    10011  114 18,

In [None]:
# There are some zipcodes with multiple areas, so need to groupby and sum
# Why not just divide by the bldg_count? 

# Can't divide by number of floors b/c a big 3 story warehouse ends up with much more area than a 24 floor tower
# Can normalize by lot area per building, but that doesn't account for giant zipcodes
# Normalize total GFA by total lot area? 
# Or average with building count

In [72]:
landuse_df

Unnamed: 0,zipcode,borocode,neighborhood,bldg_count,1 & 2 Family Residential,Multi-Family Walk-up,Multi-Family Elevator,Mixed Residential & Commercial,Commercial & Office,Industrial & Manufacturing,Transportation & Utility,Public Facilities & Institutions,Open Space & Outdoor Recreation,Parking,Vacant Land
0,10001.0,Manhattan,NOMAD / Chelsea,981,33606.0,592289.0,8350224.0,13875477.0,53005101.0,4445633.0,1182594.0,10895555.0,1152624.0,616150.0,0.0
1,10002.0,Manhattan,Lower East Side,1672,51117.0,1938379.0,16618352.0,16356822.0,3200967.0,1182700.0,590077.0,4618645.0,87067.0,252263.0,0.0
2,10003.0,Manhattan,NOHO / Union Square,1864,515395.0,3786590.0,12746613.0,15687708.0,10760299.0,714504.0,3459.0,9501989.0,126922.0,159151.0,0.0
3,10004.0,Manhattan,Bowling Green,112,0.0,0.0,725023.0,4194913.0,21117265.0,85472.0,462543.0,3722844.0,945425.0,87775.0,0.0
4,10005.0,Manhattan,Wall Street,64,0.0,0.0,2480663.0,5683041.0,16515733.0,0.0,682450.0,43568.0,0.0,0.0,0.0
5,10006.0,Manhattan,Lower Manhattan,64,0.0,0.0,2406484.0,1409933.0,7224839.0,0.0,0.0,237019.0,0.0,305026.0,0.0
6,10007.0,Manhattan,City Hall,231,11515.0,135365.0,25180716.0,7113948.0,26421102.0,186711.0,6565.0,1964664.0,0.0,0.0,0.0
7,10009.0,Manhattan,East Village,1314,49533.0,4551976.0,7867831.0,16437543.0,1094334.0,132152.0,175000.0,1778954.0,52703.0,3358.0,0.0
8,10010.0,Manhattan,Flat Iron / Gramercy,607,35514.0,671530.0,7188028.0,14478911.0,16348068.0,316637.0,65745.0,5288022.0,20206.0,84013.0,0.0
9,10011.0,Manhattan,Chelsea,2062,1029569.0,4471226.0,14800035.0,18123314.0,15296761.0,1648286.0,464332.0,4294146.0,12323.0,78016.0,0.0


In [14]:
csv_path = "../nearest_neighborhood/static/js/available_zipcodes.csv"
landuse_df.zipcode.to_csv(csv_path, indext=False)

  


In [18]:
# List of zipcodes without corresponding geoJSON need to be removed from database
drop_zips = [11249, 22222]

In [19]:
def check_zip(zipcode):
    if zipcode not in drop_zips:
        return True
    else:
        return False

In [23]:
landuse_df = landuse_df[landuse_df.zipcode.apply(check_zip)].copy()

### Create Database file for Flask app

In [25]:
database_path = "../zipcodeDB.sqlite"

In [26]:
# Create Engine
# will create sqlite file if it doesn't already exist
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()

In [27]:
# Push data to database
landuse_df.to_sql(name="zip_table", con=conn, if_exists ="replace", index=False)

# KNN Algorith for Backend

## Run Initial Query And Find Nearest Neighbor

In [91]:
# Specify target zipcode

#zipcode = 11101 #Long Island City, QNS
#zipcode = 11104 #Sunnyside, QNS
#zipcode = 11355 #Flushing, QNS
#zipcode = 11201 #Brooklyn Heights, BK
#zipcode = 10304 #Todt Hill, SI
zipcode = 10016 #Murray Hill

In [92]:
database_path = "../zipcodeDB.sqlite"

# Create Engine
# will create sqlite file if it doesn't already exist
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()

# Query records to test that it works
data_df = pd.read_sql("SELECT * FROM zip_table", conn)

In [93]:
data_df.head()

Unnamed: 0,zipcode,borocode,1 & 2 Family Residential,Multi-Family Walk-up,Multi-Family Elevator,Mixed Residential & Commercial,Commercial & Office,Industrial & Manufacturing,Transportation & Utility,Public Facilities & Institutions,Open Space & Outdoor Recreation,Parking,Vacant Land,neighborhood
0,10001.0,Manhattan,33606.0,592289.0,8350224.0,13875477.0,53005101.0,4445633.0,1182594.0,10895555.0,1152624.0,616150.0,0.0,NOMAD / Chelsea
1,10002.0,Manhattan,51117.0,1938379.0,16618352.0,16356822.0,3200967.0,1182700.0,590077.0,4618645.0,87067.0,252263.0,0.0,Lower East Side
2,10003.0,Manhattan,515395.0,3786590.0,12746613.0,15687708.0,10760299.0,714504.0,3459.0,9501989.0,126922.0,159151.0,0.0,NOHO / Union Square
3,10004.0,Manhattan,0.0,0.0,725023.0,4194913.0,21117265.0,85472.0,462543.0,3722844.0,945425.0,87775.0,0.0,Bowling Green
4,10005.0,Manhattan,0.0,0.0,2480663.0,5683041.0,16515733.0,0.0,682450.0,43568.0,0.0,0.0,0.0,Wall Street


In [94]:
# Split DFs so that selected outer-borough zipcode can find nearest neighbor zip in Manhattan
manhattan_df = data_df[data_df.borocode == "Manhattan"]
outer_borough_df = data_df[data_df.borocode != "Manhattan"]

In [95]:
# Create dictObj
# This formatting is reuired for the D3 chart, and is (believe it or not) the cleanest solution I could come up with...
dictObj = {"manhattan": {"borough": "Manhattan",
                         "zipcode": None,
                         "neighborhood": None,
                         "values": {"0": {"color": "#FFDD80", 
                                          "GFA": None,
                                          "label": "1 & 2 Family Residential"},
                                    "1": {"color": "#ff9100",
                                          "GFA": None,
                                          "label": "Multi-Family Walk-up"},
                                    "2": {"color": "#bf360c",
                                          "GFA": None,
                                          "label": "Multi-Family Elevator"},
                                    "3": {"color": "#ff5252",
                                          "GFA": None,
                                          "label": "Mixed Residential & Commercial"},
                                    "4": {"color": "#c51162",
                                          "GFA": None,
                                          "label": "Commercial & Office"},
                                    "5": {"color": "#7b1fa2",
                                          "GFA": None,
                                          "label": "Industrial & Manufacturing"},
                                    "6": {"color": "#ba68c8",
                                          "GFA": None,
                                          "label": "Transportation & Utility"},
                                    "7": {"color": "#0d47a1",
                                          "GFA": None,
                                          "label": "Public Facilities & Institutions"},
                                    "8": {"color": "#00bfa5",
                                          "GFA": None,
                                          "label": "Open Space & Outdoor Recreation"},
                                    "9": {"color": "#607d8b",
                                          "GFA": None, 
                                          "label": "Parking"},
                                    "10": {"color": "#263238",
                                           "GFA": None,
                                           "label": "Vacant Land"}
                                   }
                        },
           "outer": {"borough": None,
                             "zipcode": None,
                             "neighborhood": None,
                             "values": {"0": {"color": "#FFDD80", 
                                              "GFA": None,
                                              "label": "1 & 2 Family Residential"},
                                        "1": {"color": "#ff9100",
                                              "GFA": None,
                                              "label": "Multi-Family Walk-up"},
                                        "2": {"color": "#bf360c",
                                              "GFA": None,
                                              "label": "Multi-Family Elevator"},
                                        "3": {"color": "#ff5252",
                                              "GFA": None,
                                              "label": "Mixed Residential & Commercial"},
                                        "4": {"color": "#c51162",
                                              "GFA": None,
                                              "label": "Commercial & Office"},
                                        "5": {"color": "#7b1fa2",
                                              "GFA": None,
                                              "label": "Industrial & Manufacturing"},
                                        "6": {"color": "#ba68c8",
                                              "GFA": None,
                                              "label": "Transportation & Utility"},
                                        "7": {"color": "#0d47a1",
                                              "GFA": None,
                                              "label": "Public Facilities & Institutions"},
                                        "8": {"color": "#00bfa5",
                                              "GFA": None,
                                              "label": "Open Space & Outdoor Recreation"},
                                        "9": {"color": "#607d8b",
                                              "GFA": None, 
                                              "label": "Parking"},
                                        "10": {"color": "#263238",
                                               "GFA": None,
                                               "label": "Vacant Land"}
                                   }
                        }
          }


In [96]:
target_borough = data_df.borocode[data_df.zipcode == zipcode].tolist()[0]
target_neighborhood = data_df.neighborhood[data_df.zipcode == zipcode].tolist()[0]

In [97]:
target_borough

'Manhattan'

In [98]:
target_neighborhood

'Murray Hill / Kips Bay'

### Initial Query

In [99]:
if target_borough == 'Manhattan':
    # Pull out target zipcode from manhattan_df
    target_df = manhattan_df[manhattan_df.zipcode == zipcode]
    
    # Define zipcode, neighborhood
    dictObj["manhattan"]["zipcode"] = zipcode
    dictObj["manhattan"]["neighborhood"] = target_neighborhood
    
    for item in dictObj["manhattan"]["values"]:
        value_type = dictObj["manhattan"]["values"][item]["label"]
        dictObj["manhattan"]["values"][item]["GFA"] = target_df[value_type].tolist()[0]
    
    
else:
    # Pull out target zipcode from outer_borough_df
    target_df = outer_borough_df[outer_borough_df.zipcode == zipcode]
    
    # Define zipcode, neighborhood, and borough
    dictObj["outer"]["zipcode"] = int(zipcode)
    dictObj["outer"]["neighborhood"] = target_neighborhood
    dictObj["outer"]["borough"] = target_borough
    
    for item in dictObj["outer"]["values"]:
        value_type = dictObj["outer"]["values"][item]["label"]
        dictObj["outer"]["values"][item]["GFA"] = target_df[value_type].tolist()[0]

In [100]:
target_df

Unnamed: 0,zipcode,borocode,1 & 2 Family Residential,Multi-Family Walk-up,Multi-Family Elevator,Mixed Residential & Commercial,Commercial & Office,Industrial & Manufacturing,Transportation & Utility,Public Facilities & Institutions,Open Space & Outdoor Recreation,Parking,Vacant Land,neighborhood
13,10016.0,Manhattan,326668.0,1390578.0,13831161.0,22955466.0,29604440.0,670902.0,6150.0,7599194.0,12406.0,261082.0,0.0,Murray Hill / Kips Bay


In [101]:
# Either Manhattan or Outer Borough GFA should be empty
pprint(dictObj)

{'manhattan': {'borough': 'Manhattan',
               'neighborhood': 'Murray Hill / Kips Bay',
               'values': {'0': {'GFA': 326668.0,
                                'color': '#FFDD80',
                                'label': '1 & 2 Family Residential'},
                          '1': {'GFA': 1390578.0,
                                'color': '#ff9100',
                                'label': 'Multi-Family Walk-up'},
                          '10': {'GFA': 0.0,
                                 'color': '#263238',
                                 'label': 'Vacant Land'},
                          '2': {'GFA': 13831161.0,
                                'color': '#bf360c',
                                'label': 'Multi-Family Elevator'},
                          '3': {'GFA': 22955466.0,
                                'color': '#ff5252',
                                'label': 'Mixed Residential & Commercial'},
                          '4': {'GFA': 29604440.0,
         

### KNN

In [107]:
if target_borough == 'Manhattan': #i.e. the nearest neighbor will NOT be in Manhattan

    # Combine with Outerborough df to train model
    train_data = pd.concat([target_df, outer_borough_df])

    # Drop column with strings (b/c KNN needs numeric data) and reset index
    train_data.drop("borocode", axis=1, inplace=True)
    train_data.drop("neighborhood", axis=1, inplace=True)
    train_data.reset_index(drop=True, inplace=True)
    
    # Train Nearest Neighbors model
    nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(train_data)
    distances, indices = nbrs.kneighbors(train_data)

    # Find nearest neighbor
    nearest_neighbor_index = indices[0][1]
    result = train_data.iloc[nearest_neighbor_index, :]
    
    # Define zipcode, neighborhood, and borough
    result_zipcode = int(result["zipcode"])
    result_neighborhood = data_df[data_df.zipcode == result["zipcode"]].neighborhood.tolist()[0]
    result_borough = data_df[data_df.zipcode == result["zipcode"]].borocode.tolist()[0]
    dictObj["outer"]["zipcode"] = result_zipcode
    dictObj["outer"]["neighborhood"] = result_neighborhood
    dictObj["outer"]["borough"] = result_borough
    
    for item in dictObj["outer"]["values"]:
        value_type = dictObj["outer"]["values"][item]["label"]
        dictObj["outer"]["values"][item]["GFA"] = result[value_type]

        
else: # i.e. the nearest neighbor IS in Manhattan

    # Combine with Manhattan df to train model
    train_data = pd.concat([target_df, manhattan_df])
    
    # Drop column with strings and reset index
    train_data.drop("borocode", axis=1, inplace=True)
    train_data.drop("neighborhood", axis=1, inplace=True)
    train_data.reset_index(drop=True, inplace=True)
    
    # Train Nearest Neighbors model
    nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(train_data)
    distances, indices = nbrs.kneighbors(train_data)

    # Find nearest neighbor
    nearest_neighbor_index = indices[0][1]
    result = train_data.iloc[nearest_neighbor_index, :]
    
    # Define zipcode, neighborhood and borough
    result_zipcode = int(result["zipcode"])
    result_neighborhood = data_df[data_df.zipcode == result["zipcode"]].neighborhood.tolist()[0]
    result_borough = data_df[data_df.zipcode == result["zipcode"]].borocode.tolist()[0]
    dictObj["manhattan"]["zipcode"] = result_zipcode
    dictObj["manhattan"]["neighborhood"] = result_neighborhood
    dictObj["manhattan"]["borough"] = result_borough
    
    for item in dictObj["manhattan"]["values"]:
        value_type = dictObj["manhattan"]["values"][item]["label"]
        dictObj["manhattan"]["values"][item]["GFA"] = result[value_type]

In [108]:
pprint(dictObj)

{'manhattan': {'borough': 'Manhattan',
               'neighborhood': 'Murray Hill / Kips Bay',
               'values': {'0': {'GFA': 326668.0,
                                'color': '#FFDD80',
                                'label': '1 & 2 Family Residential'},
                          '1': {'GFA': 1390578.0,
                                'color': '#ff9100',
                                'label': 'Multi-Family Walk-up'},
                          '10': {'GFA': 0.0,
                                 'color': '#263238',
                                 'label': 'Vacant Land'},
                          '2': {'GFA': 13831161.0,
                                'color': '#bf360c',
                                'label': 'Multi-Family Elevator'},
                          '3': {'GFA': 22955466.0,
                                'color': '#ff5252',
                                'label': 'Mixed Residential & Commercial'},
                          '4': {'GFA': 29604440.0,
         