In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from geopy.geocoders import Nominatim
import datetime


In [2]:
#import the shapefile database (large file)
import pandashp as pdshp
df = pdshp.read_shp('data/BuildingFootprints/geo_export_3812582d-7726-4629-a875-09f9935aed3c')

  df = df.convert_objects(convert_numeric=True)


In [3]:
#select only buildings via feat_code 2100, which is a building footprint
print(df.columns)
newpd = df[df['feat_code']==2100]
nrecs = newpd.shape[0]
del df

Index([u'num_floors', u'feat_code', u'groundelev', u'cnstrct_yr', u'name',
       u'doitt_id', u'date_lstmo', u'time_lstmo', u'bbl', u'bin',
       u'lststatype', u'shape_area', u'heightroof', u'built_code',
       u'shape_len', u'geometry'],
      dtype='object')


In [4]:
#calculate center of building and assign it to a new column
posArray = np.zeros((nrecs,2))
crdind = 0
for i in newpd.index:
    tempcrd = newpd.loc[i,'geometry'].centroid.coords[:]
    posArray[crdind,:] = tempcrd[0]
    crdind +=1 

In [5]:
# join it to a set of selected entries from new pd, then export to a csv
importantpd = newpd.loc[:,['doitt_id','bbl','bin']]
#add coordinates to frame
importantpd['long'] = pd.Series(posArray[:,0],index=importantpd.index)
importantpd['lat'] = pd.Series(posArray[:,1],index=importantpd.index)

#convert bbl in important pd to string, and NAN to 00000000000
naninds = np.isnan(importantpd['bbl'])
importantpd.loc[naninds,'bbl'] = ['00000000000' for item in importantpd.loc[naninds,'bbl']]
importantpd.loc[~naninds,'bbl'] = [str(int(item)) for item in importantpd.loc[~naninds,'bbl']]
#save this database as a csv for sql use
importantpd.to_csv('data/BuildingFootprints_augmented.csv')

In [6]:
#load a csv file of interest
bk15 = pd.read_csv('data/2015_brooklyn.csv',sep = ',')
#calculate the bbl and format it as string
borough = [str(item) for item in np.array(bk15.loc[:,'BOROUGH']).astype(int)]
block= [str(item).zfill(5) for item in np.array(bk15.loc[:,'BLOCK']).astype(int)]
lot = [str(item).zfill(4) for item in np.array(bk15.loc[:,'LOT']).astype(int)]
bbl = []
for k in range(len(borough)):
    bbl.append(borough[k]+block[k]+lot[k])
    
bk15['bbl'] = pd.Series(bbl,index=bk15.index)

In [7]:
#join the databases
joinedpd = pd.merge(bk15,importantpd,on='bbl')

In [8]:
joinedpd

Unnamed: 0,BOROUGH,NEIGHBORHOOD,BUILDING CLASS CATEGORY,TAX CLASS AT PRESENT,BLOCK,LOT,EASE-MENT,BUILDING CLASS AT PRESENT,ADDRESS,APARTMENT NUMBER,...,YEAR BUILT,TAX CLASS AT TIME OF SALE,BUILDING CLASS AT TIME OF SALE,SALE PRICE,SALE DATE,bbl,doitt_id,bin,long,lat
0,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6360.0,22.0,,A5,8647 15TH AVENUE,,...,1930.0,1.0,A5,758000.0,2015-03-31,3063600022,618823.0,3166152.0,-74.010290,40.610341
1,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6361.0,17.0,,A5,55 BAY 10TH STREET,,...,1930.0,1.0,A5,778000.0,2015-06-15,3063610017,312932.0,3166223.0,-74.009735,40.609758
2,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6371.0,60.0,,A9,8620 19TH AVENUE,,...,1930.0,1.0,A9,0.0,2015-09-16,3063710060,771618.0,3166707.0,-74.001134,40.605598
3,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6372.0,48.0,,S1,1906 86TH STREET,,...,1931.0,1.0,S1,1365000.0,2015-05-29,3063720048,618058.0,3166758.0,-74.000385,40.605607
4,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6373.0,73.0,,A1,50 BAY 23RD STREET,,...,1930.0,1.0,A1,750000.0,2015-12-17,3063730073,411106.0,3166829.0,-74.000249,40.604135
5,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6374.0,49.0,,S1,1964 86TH STREET,,...,1925.0,1.0,S1,1470000.0,2015-05-06,3063740049,640221.0,3166863.0,-73.999002,40.604787
6,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6374.0,51.0,,S1,1970 86TH STREET,,...,1931.0,1.0,S1,1790000.0,2015-04-30,3063740051,290778.0,3166865.0,-73.998932,40.604685
7,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6398.0,39.0,,A1,1638 BENSON AVENUE,,...,1925.0,1.0,A1,820000.0,2015-07-24,3063980039,594134.0,3167572.0,-74.008041,40.607503
8,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6399.0,8.0,,S1,1653 BATH AVENUE,,...,1930.0,1.0,S1,0.0,2015-06-25,3063990008,600719.0,3167611.0,-74.008799,40.605738
9,3.0,BATH BEACH,01 ONE FAMILY DWELLINGS,1.0,6399.0,108.0,,S1,1651 BATH AVENUE,,...,1930.0,1.0,S1,0.0,2015-06-25,3063990108,364123.0,3167636.0,-74.008848,40.605769


In [9]:
#write to csv
joinedpd.to_csv('data/bk15_augmented.csv')