# Using GeoClient to geocode a dataframe

single field address input, 

outputs bin and bbl

read the documentation to modify this for different types of inputs
https://api.cityofnewyork.us/geoclient/v1/doc


In [1]:
import pandas as pd
pd.set_option('display.max_rows', 200)

import sys
from urllib.parse import urlencode
from requests import get


In [2]:
def geoclientBatch(df,address='address'):
    '''
    Uses DOITT's GeoClient (the web interface to DCP's GeoSupport)
    https://api.cityofnewyork.us/geoclient/v1/doc
    Single Field Search input type
    
    Inputs: df = dataframe to be geocoded, address = the name of the column with the address as a string
    Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
    
    '''
    path = 'https://api.cityofnewyork.us/geoclient/v1/search.json?app_id=fb9ad04a&app_key=051f93e4125df4bae4f7c57517e62344&'

    #warnings.filterwarnings('ignore') #do not display warnings
    
    def hitGeoC(df):
        try:
            query = {'input':df[address]}
            response = get(path+urlencode(query))
            results = response.json()['results'][0]['response']            
            BBL = results['bbl']
            BIN = results['buildingIdentificationNumber']
        except:
            e = sys.exc_info()[0]
            BBL = ( "Error: %s" % e )
            BIN = BBL
        return BBL,BIN
    
    df[['geocodedBBL','geocodedBIN']] = df.apply(hitGeoC,axis=1).apply(pd.Series)
    return df


# Example

Using PLUTO from open data to get a list of addresses to geocode

In [3]:
url = 'https://data.cityofnewyork.us/resource/64uk-42ks.json'
filters = "?$limit=100"
pluto = pd.read_json(url+filters)
pluto.shape

(100, 82)

In [4]:
# format the address field to include the borough

borocodeMap = {'MN':'Manhattan','BX':'Bronx','BK':'Brooklyn','QN':'Queens','SI':'Staten Island'}

pluto['full address'] = pluto.address+' '+pluto.borough.map(borocodeMap)

In [5]:
# run the pluto dataframe through geoclient and save the resulting dataframe as df
df = geoclientBatch(pluto,address='full address')

df.shape

(100, 85)

In [9]:
df[['full address','geocodedBBL','geocodedBIN']].head(20)

Unnamed: 0,full address,geocodedBBL,geocodedBIN
0,CARLISLE PLACE Bronx,Error: <class 'IndexError'>,Error: <class 'IndexError'>
1,65 EAST 3 STREET Manhattan,1004450054,1087956
2,213 EAST 11 STREET Manhattan,1004670055,1006847
3,636 LEONARD STREET Brooklyn,3026217501,3391860
4,106 CONVENT AVENUE Manhattan,1019700058,1059630
5,764 JACKSON AVENUE Bronx,2026460007,2004574
6,3090 VILLA AVENUE Bronx,2033100022,2017306
7,1074 OLMSTEAD AVENUE Bronx,2038080006,2115127
8,CALCUTTA STREET Staten Island,Error: <class 'IndexError'>,Error: <class 'IndexError'>
9,LAMONT AVENUE Staten Island,Error: <class 'IndexError'>,Error: <class 'IndexError'>
