# GeoClient examples

documentation
https://api.cityofnewyork.us/geoclient/v1/doc


In [1]:
import pandas as pd
pd.set_option('display.max_rows', 200)

import sys
from urllib.parse import urlencode
from requests import get


## 1. Single Field Search

the single field search can parse many different types of input, including address, bbl, bin, intersection

In [2]:
sfsPath = 'https://api.nyc.gov/geo/geoclient/v1/search.json?'
key='46568ebcfa154bbcb23cd1e2c1284cef'

sfsQuery = {'input':'253 Broadway Manhattan'}

URL = sfsPath + 'subscription-key=' + key + '&' + urlencode(sfsQuery)
URL

'https://api.nyc.gov/geo/geoclient/v1/search.json?subscription-key=46568ebcfa154bbcb23cd1e2c1284cef&input=253+Broadway+Manhattan'

In [3]:
# put that url into a get request and print the output
response = get(URL)
results = response.json()['results'][0]['response']            
results

{'alleyCrossStreetsFlag': 'X',
 'assemblyDistrict': '66',
 'bbl': '1001347501',
 'bblBoroughCode': '1',
 'bblTaxBlock': '00134',
 'bblTaxLot': '7501',
 'blockfaceId': '0212261726',
 'boardOfElectionsPreferredLgc': '1',
 'boePreferredStreetName': 'BROADWAY',
 'boePreferredstreetCode': '11361001',
 'boroughCode1In': '1',
 'buildingIdentificationNumber': '1082757',
 'censusBlock2000': '1010',
 'censusBlock2010': '1004',
 'censusTract1990': '  21  ',
 'censusTract2000': '  21  ',
 'censusTract2010': '  21  ',
 'cityCouncilDistrict': '01',
 'civilCourtDistrict': '01',
 'coincidentSegmentCount': '1',
 'communityDistrict': '101',
 'communityDistrictBoroughCode': '1',
 'communityDistrictNumber': '01',
 'communitySchoolDistrict': '02',
 'condominiumBillingBbl': '1001347501',
 'condominiumFlag': 'C',
 'congressionalDistrict': '10',
 'continuousParityIndicator1e': 'L',
 'cooperativeIdNumber': '0000',
 'crossStreetNamesFlagIn': 'E',
 'dcpCommercialStudyArea': '11007',
 'dcpPreferredLgc': '01',
 'd

In [4]:
# try again but with BBL as the input
sfsQuery = {'input':'1001347501'}

URL = sfsPath + 'subscription-key=' + key + '&' + urlencode(sfsQuery)
URL

'https://api.nyc.gov/geo/geoclient/v1/search.json?subscription-key=46568ebcfa154bbcb23cd1e2c1284cef&input=1001347501'

In [5]:
response = get(URL)
results = response.json()['results'][0]['response']            
results

{'bbl': '1001347501',
 'bblBoroughCode': '1',
 'bblBoroughCodeIn': '1',
 'bblTaxBlock': '00134',
 'bblTaxBlockIn': '00134',
 'bblTaxLot': '7501',
 'bblTaxLotIn': '7501',
 'buildingIdentificationNumber': '1082756',
 'condominiumBillingBbl': '1001347501',
 'condominiumFlag': 'C',
 'cooperativeIdNumber': '0000',
 'dcpCommercialStudyArea': '11007',
 'dofCondominiumIdentificationNumber': '0657',
 'firstBoroughName': 'MANHATTAN',
 'geosupportFunctionCode': 'BL',
 'geosupportReturnCode': '00',
 'gi5DigitStreetCode1': '13610',
 'gi5DigitStreetCode2': '13610',
 'gi5DigitStreetCode3': '13610',
 'gi5DigitStreetCode4': '27050',
 'giBoroughCode1': '1',
 'giBoroughCode2': '1',
 'giBoroughCode3': '1',
 'giBoroughCode4': '1',
 'giBuildingIdentificationNumber1': '1082756',
 'giBuildingIdentificationNumber2': '1082756',
 'giBuildingIdentificationNumber3': '1082757',
 'giBuildingIdentificationNumber4': '1082757',
 'giDcpPreferredLgc1': '01',
 'giDcpPreferredLgc2': '01',
 'giDcpPreferredLgc3': '01',
 'giD

## 2. Address Search

address search uses inputs:

* house number
* street
* zip or borough

In [6]:
addrPath = 'https://api.nyc.gov/geo/geoclient/v1/address.json?'
key='46568ebcfa154bbcb23cd1e2c1284cef'

addrQuery = {'houseNumber':'253',
            'street':'Broadway',
            'borough':'manhattan',
            #'zip':'10007'
            }

URL = addrPath + 'subscription-key=' + key + '&' + urlencode(addrQuery)
URL

'https://api.nyc.gov/geo/geoclient/v1/address.json?subscription-key=46568ebcfa154bbcb23cd1e2c1284cef&houseNumber=253&street=Broadway&borough=manhattan'

In [7]:
response = get(URL)
results = response.json()['address']#[0]['response']            
results

{'alleyCrossStreetsFlag': 'X',
 'assemblyDistrict': '66',
 'bbl': '1001347501',
 'bblBoroughCode': '1',
 'bblTaxBlock': '00134',
 'bblTaxLot': '7501',
 'blockfaceId': '0212261726',
 'boardOfElectionsPreferredLgc': '1',
 'boePreferredStreetName': 'BROADWAY',
 'boePreferredstreetCode': '11361001',
 'boroughCode1In': '1',
 'buildingIdentificationNumber': '1082757',
 'censusBlock2000': '1010',
 'censusBlock2010': '1004',
 'censusTract1990': '  21  ',
 'censusTract2000': '  21  ',
 'censusTract2010': '  21  ',
 'cityCouncilDistrict': '01',
 'civilCourtDistrict': '01',
 'coincidentSegmentCount': '1',
 'communityDistrict': '101',
 'communityDistrictBoroughCode': '1',
 'communityDistrictNumber': '01',
 'communitySchoolDistrict': '02',
 'condominiumBillingBbl': '1001347501',
 'condominiumFlag': 'C',
 'congressionalDistrict': '10',
 'continuousParityIndicator1e': 'L',
 'cooperativeIdNumber': '0000',
 'crossStreetNamesFlagIn': 'E',
 'dcpCommercialStudyArea': '11007',
 'dcpPreferredLgc': '01',
 'd

Can also combine house number and street into one field

In [8]:
addrQuery = {
            #'houseNumber':'',
            'street':'253 Broadway',
            'borough':'manhattan',
            #'zip':'10007'
            }

URL = addrPath + 'subscription-key=' + key + '&' + urlencode(addrQuery)
URL

'https://api.nyc.gov/geo/geoclient/v1/address.json?subscription-key=46568ebcfa154bbcb23cd1e2c1284cef&street=253+Broadway&borough=manhattan'

In [9]:
response = get(URL)
results = response.json()['address']#[0]['response']            
results

{'alleyCrossStreetsFlag': 'X',
 'assemblyDistrict': '66',
 'bbl': '1001347501',
 'bblBoroughCode': '1',
 'bblTaxBlock': '00134',
 'bblTaxLot': '7501',
 'blockfaceId': '0212261726',
 'boardOfElectionsPreferredLgc': '1',
 'boePreferredStreetName': 'BROADWAY',
 'boePreferredstreetCode': '11361001',
 'boroughCode1In': '1',
 'buildingIdentificationNumber': '1082757',
 'censusBlock2000': '1010',
 'censusBlock2010': '1004',
 'censusTract1990': '  21  ',
 'censusTract2000': '  21  ',
 'censusTract2010': '  21  ',
 'cityCouncilDistrict': '01',
 'civilCourtDistrict': '01',
 'coincidentSegmentCount': '1',
 'communityDistrict': '101',
 'communityDistrictBoroughCode': '1',
 'communityDistrictNumber': '01',
 'communitySchoolDistrict': '02',
 'condominiumBillingBbl': '1001347501',
 'condominiumFlag': 'C',
 'congressionalDistrict': '10',
 'continuousParityIndicator1e': 'L',
 'cooperativeIdNumber': '0000',
 'crossStreetNamesFlagIn': 'E',
 'dcpCommercialStudyArea': '11007',
 'dcpPreferredLgc': '01',
 'd

# Batch Geoclient

using Geoclient to geocode a dataframe

uses single field search input type and returns bbl and bin

In [26]:
def geoclientBatch(df,address='input'):
    '''
    Uses DOITT's GeoClient (the web interface to DCP's GeoSupport)
    https://api.cityofnewyork.us/geoclient/v1/doc
    Single Field Search input type
    
    Inputs: df = dataframe to be geocoded, 
    address = the name of the column with the address or input as a string
    input can be address and zip, address and borough, bbl, bin
    
    Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
    
    '''
    path = 'https://api.nyc.gov/geo/geoclient/v1/search.json?subscription-key=46568ebcfa154bbcb23cd1e2c1284cef&'
    
    
    #warnings.filterwarnings('ignore') #do not display warnings
    
    def hitGeoC(df):
        try:
            query = {'input':df[address]}
            response = get(path+urlencode(query))
            results = response.json()['results'][0]['response']            
            BBL = results['bbl']
            BIN = results['buildingIdentificationNumber']
        except:
            e = sys.exc_info()[0]
            BBL = ( "Error: %s" % e )
            BIN = BBL
        return BBL,BIN
    
    df[['geocodedBBL','geocodedBIN']] = df.apply(hitGeoC,axis=1).apply(pd.Series)
    return df


### Example of batch geoclient use

Using PLUTO from open data to get a list of addresses to geocode

In [11]:
url = 'https://data.cityofnewyork.us/resource/64uk-42ks.json'
filters = "?$limit=100"
pluto = pd.read_json(url+filters)
pluto.shape

(100, 82)

In [32]:
# format the address field to include the borough
borocodeMap = {'MN':'Manhattan','BX':'Bronx','BK':'Brooklyn','QN':'Queens','SI':'Staten Island'}
pluto['full address'] = pluto.address+' '+pluto.borough.map(borocodeMap)

In [33]:
# run the pluto dataframe through geoclient and save the resulting dataframe as df
df = geoclientBatch(pluto,address='full address')
df.shape

(100, 85)

In [34]:
df[['full address','geocodedBBL','geocodedBIN']].head(20)

Unnamed: 0,full address,geocodedBBL,geocodedBIN
0,CARLISLE PLACE Bronx,Error: <class 'IndexError'>,Error: <class 'IndexError'>
1,65 EAST 3 STREET Manhattan,1004450054,1087956
2,213 EAST 11 STREET Manhattan,1004670055,1006847
3,636 LEONARD STREET Brooklyn,3026217501,3391860
4,106 CONVENT AVENUE Manhattan,1019700058,1059630
5,764 JACKSON AVENUE Bronx,2026460007,2004574
6,3090 VILLA AVENUE Bronx,2033100022,2017306
7,1074 OLMSTEAD AVENUE Bronx,2038080006,2115127
8,CALCUTTA STREET Staten Island,Error: <class 'IndexError'>,Error: <class 'IndexError'>
9,LAMONT AVENUE Staten Island,Error: <class 'IndexError'>,Error: <class 'IndexError'>


In [35]:
# roughly 10% error rate for pluto
df[df.geocodedBBL.str.contains('Error')].shape

(9, 85)

In [36]:
# redo the above but using zipcode instead of borough 
pluto['full address'] = pluto.address + ' ' + pluto.zipcode.astype(str).str[:-2]

In [37]:
# run the pluto dataframe through geoclient and save the resulting dataframe as df
df = geoclientBatch(pluto,address='full address')

df.shape

(100, 85)

In [38]:
df[['full address','geocodedBBL','geocodedBIN']].head(20)

Unnamed: 0,full address,geocodedBBL,geocodedBIN
0,CARLISLE PLACE 10467,Error: <class 'IndexError'>,Error: <class 'IndexError'>
1,65 EAST 3 STREET 10003,1004450054,1087956
2,213 EAST 11 STREET 10003,1004670055,1006847
3,636 LEONARD STREET 11222,3026217501,3391860
4,106 CONVENT AVENUE 10027,1019700058,1059630
5,764 JACKSON AVENUE 10456,2026460007,2004574
6,3090 VILLA AVENUE 10468,2033100022,2017306
7,1074 OLMSTEAD AVENUE 10472,2038080006,2115127
8,CALCUTTA STREET 10309,Error: <class 'IndexError'>,Error: <class 'IndexError'>
9,LAMONT AVENUE 10312,Error: <class 'IndexError'>,Error: <class 'IndexError'>


In [39]:
# same error rate using zip instead of borough
df[df.geocodedBBL.str.contains('Error')].shape

(9, 85)