## Part 2 - Geocoding API

In [1]:
# Import libraries
import pandas as pd
import numpy as np

# For Part 1b API Call
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent= 'alison.sadel@gmail.com')
from geopy.extra.rate_limiter import RateLimiter
import webbrowser

# To see full column values
pd.set_option('max_colwidth', 800)


In [2]:
# Create path
file = "Resources/clean.csv"

# Read in file
df = pd.read_csv(file)

# Drop 2nd index
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

df

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,Rental,11209,"9602 4th Avenue, New York, New York, 11209"
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,Sale,11231,"94 Degraw Street, Brooklyn, New York, 11231"
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,Sale,11221,"933 Lafayette Street, Brooklyn, New York, 11221"
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,Rental,11222,"918 Manhattan Ave, Brooklyn, New York, 11222"
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,Sale,11213,"906 Prospect Place, Brooklyn, New York, 11213"
...,...,...,...,...,...,...,...,...,...,...,...
231,308 East 38th Street,4C,Manhattan,Murray Hill,New York,New York,2018-01-01,890000.0,Sale,10016,"308 East 38th Street, New York, New York, 10016"
232,308 East 38th Street,8D,Manhattan,Murray Hill,New York,New York,2018-01-01,899000.0,Sale,10016,"308 East 38th Street, New York, New York, 10016"
233,308 East 38th Street,16F,Manhattan,Murray Hill,New York,New York,2018-01-01,1556238.0,Sale,10016,"308 East 38th Street, New York, New York, 10016"
234,308 East 38th Street,11E,Manhattan,Murray Hill,New York,New York,2018-01-01,1374723.0,Sale,10016,"308 East 38th Street, New York, New York, 10016"


In [3]:
# 1
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [4]:
# 2

df['Address'] = df['Location'].apply(geocode)

In [6]:
pd.set_option('display.max_rows', 500)

In [7]:
df

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,Rental,11209,"9602 4th Avenue, New York, New York, 11209","(9602, 4th Avenue, Fort Hamilton, Brooklyn, Kings County, New York, 11209, United States, (40.6151866, -74.032020545154))"
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,Sale,11231,"94 Degraw Street, Brooklyn, New York, 11231","(94, Degraw Street, Columbia Street Waterfront District, Brooklyn, Kings County, New York, 11231, United States, (40.685799349999996, -74.00335189192313))"
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,Sale,11221,"933 Lafayette Street, Brooklyn, New York, 11221",
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,Rental,11222,"918 Manhattan Ave, Brooklyn, New York, 11222","(Green Farms Supermarket, 918, Manhattan Avenue, Brooklyn, Kings County, New York, 11222, United States, (40.7307511, -73.9540256))"
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,Sale,11213,"906 Prospect Place, Brooklyn, New York, 11213","(906, Prospect Place, Eastern Parkway, Brooklyn, Kings County, New York, 11213, United States, (40.67376755, -73.94657791384977))"
5,881 Park Avenue,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1450000.0,Sale,11206,"881 Park Avenue, Brooklyn, New York, 11206","(881, Park Avenue, Brooklyn, Kings County, New York, 11206, United States, (40.698866949999996, -73.93986610491517))"
6,846 Hancock Street,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1670000.0,Sale,11233,"846 Hancock Street, Brooklyn, New York, 11233","(846, Hancock Street, BEDFORD STUYVESANT/ EXPANDED STUYVEVSANT HEIGHTS HISTORIC DISTRICT, Brooklyn, Kings County, New York, 11233, United States, (40.68560405, -73.92165635454509))"
7,845 Greene Avenue,Unit 3,Brooklyn,Bedford-Stuyvesant,New York,New York,2020-01-01,1895.0,Rental,11221,"845 Greene Avenue, New York, New York, 11221","(845, Greene Avenue, Bedford-Stuyvesant, Brooklyn, Kings County, New York, 11221, United States, (40.69052825, -73.93423862971204))"
8,757 Bergen Street,1,Brooklyn,Crown Heights,Brooklyn,New York,2020-01-01,1543688.0,Sale,11238,"757 Bergen Street, Brooklyn, New York, 11238","(757, Bergen Street, Prospect Heights, Brooklyn, Kings County, New York, 11238, United States, (40.6786036, -73.96327695))"
9,721 Monroe Street,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1999999.0,Sale,11221,"721 Monroe Street, Brooklyn, New York, 11221","(721, Monroe Street, Bedford-Stuyvesant, Brooklyn, Kings County, New York, 11221, United States, (40.68824595, -73.92800579875049))"


In [8]:
# 3 
df['point'] = df['Address'].apply(lambda loc: tuple(loc.point) if loc else None)
df.point

0                (40.6151866, -74.032020545154, 0.0)
1      (40.685799349999996, -74.00335189192313, 0.0)
2                                               None
3                     (40.7307511, -73.9540256, 0.0)
4             (40.67376755, -73.94657791384977, 0.0)
5      (40.698866949999996, -73.93986610491517, 0.0)
6             (40.68560405, -73.92165635454509, 0.0)
7             (40.69052825, -73.93423862971204, 0.0)
8                    (40.6786036, -73.96327695, 0.0)
9             (40.68824595, -73.92800579875049, 0.0)
10      (40.71529507692308, -74.00703146153846, 0.0)
11                    (40.7112356, -73.9481786, 0.0)
12                    (40.7112356, -73.9481786, 0.0)
13                    (40.7112356, -73.9481786, 0.0)
14             (40.6941834, -73.99585531608435, 0.0)
15                   (40.7266738, -73.94668815, 0.0)
16                   (40.7266738, -73.94668815, 0.0)
17             (40.7247992, -73.94660784727965, 0.0)
18            (40.65299665, -73.91209596183775

In [9]:
df.point

0                (40.6151866, -74.032020545154, 0.0)
1      (40.685799349999996, -74.00335189192313, 0.0)
2                                               None
3                     (40.7307511, -73.9540256, 0.0)
4             (40.67376755, -73.94657791384977, 0.0)
5      (40.698866949999996, -73.93986610491517, 0.0)
6             (40.68560405, -73.92165635454509, 0.0)
7             (40.69052825, -73.93423862971204, 0.0)
8                    (40.6786036, -73.96327695, 0.0)
9             (40.68824595, -73.92800579875049, 0.0)
10      (40.71529507692308, -74.00703146153846, 0.0)
11                    (40.7112356, -73.9481786, 0.0)
12                    (40.7112356, -73.9481786, 0.0)
13                    (40.7112356, -73.9481786, 0.0)
14             (40.6941834, -73.99585531608435, 0.0)
15                   (40.7266738, -73.94668815, 0.0)
16                   (40.7266738, -73.94668815, 0.0)
17             (40.7247992, -73.94660784727965, 0.0)
18            (40.65299665, -73.91209596183775

In [10]:
# 4a - Extract Data as Series
coordinates_series = df['point'].apply(pd.Series)
coordinates_series

#4b - Add Latitude and Longitude as columns in dataframe
df['Latitude'] = coordinates_series[0]

df['Longitude'] = coordinates_series[1]

In [12]:
#4c - Find Missing Values
# Find None Values by Creating a bool series True for NaN values 
bool_series = pd.isnull(df["Latitude"]) 
#bool_series = pd.isnull(full["Longitude"]) 

In [13]:
# Display data only where columns = NaN 
df[bool_series]

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address,point,Latitude,Longitude
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,Sale,11221,"933 Lafayette Street, Brooklyn, New York, 11221",,,,
31,567 Ocean Blvd,A102,Brooklyn,Prospect Park South,Brooklyn,New York,2021-01-01,750000.0,Sale,11226,"567 Ocean Blvd, Brooklyn, New York, 11226",,,,
100,1725 York Avenue,4D,Manhattan,Upper East Side,Brooklyn,New York,2021-01-01,995000.0,Sale,10128,"1725 York Avenue, Brooklyn, New York, 10128",,,,


In [16]:
# 4d Add missing values in Lat/Long columns

df.loc[2,'Latitude'] = 40.692015
df.loc[2,'Longitude'] = -73.934678

df.loc[31,'Latitude'] = 40.619061
df.loc[31,'Longitude'] = -73.955139

df.loc[100,'Latitude'] = 40.77844
df.loc[100,'Longitude'] = -73.945538


df

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address,point,Latitude,Longitude
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,Rental,11209,"9602 4th Avenue, New York, New York, 11209","(9602, 4th Avenue, Fort Hamilton, Brooklyn, Kings County, New York, 11209, United States, (40.6151866, -74.032020545154))","(40.6151866, -74.032020545154, 0.0)",40.615187,-74.032021
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,Sale,11231,"94 Degraw Street, Brooklyn, New York, 11231","(94, Degraw Street, Columbia Street Waterfront District, Brooklyn, Kings County, New York, 11231, United States, (40.685799349999996, -74.00335189192313))","(40.685799349999996, -74.00335189192313, 0.0)",40.685799,-74.003352
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,Sale,11221,"933 Lafayette Street, Brooklyn, New York, 11221",,,40.692015,-73.934678
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,Rental,11222,"918 Manhattan Ave, Brooklyn, New York, 11222","(Green Farms Supermarket, 918, Manhattan Avenue, Brooklyn, Kings County, New York, 11222, United States, (40.7307511, -73.9540256))","(40.7307511, -73.9540256, 0.0)",40.730751,-73.954026
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,Sale,11213,"906 Prospect Place, Brooklyn, New York, 11213","(906, Prospect Place, Eastern Parkway, Brooklyn, Kings County, New York, 11213, United States, (40.67376755, -73.94657791384977))","(40.67376755, -73.94657791384977, 0.0)",40.673768,-73.946578
5,881 Park Avenue,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1450000.0,Sale,11206,"881 Park Avenue, Brooklyn, New York, 11206","(881, Park Avenue, Brooklyn, Kings County, New York, 11206, United States, (40.698866949999996, -73.93986610491517))","(40.698866949999996, -73.93986610491517, 0.0)",40.698867,-73.939866
6,846 Hancock Street,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1670000.0,Sale,11233,"846 Hancock Street, Brooklyn, New York, 11233","(846, Hancock Street, BEDFORD STUYVESANT/ EXPANDED STUYVEVSANT HEIGHTS HISTORIC DISTRICT, Brooklyn, Kings County, New York, 11233, United States, (40.68560405, -73.92165635454509))","(40.68560405, -73.92165635454509, 0.0)",40.685604,-73.921656
7,845 Greene Avenue,Unit 3,Brooklyn,Bedford-Stuyvesant,New York,New York,2020-01-01,1895.0,Rental,11221,"845 Greene Avenue, New York, New York, 11221","(845, Greene Avenue, Bedford-Stuyvesant, Brooklyn, Kings County, New York, 11221, United States, (40.69052825, -73.93423862971204))","(40.69052825, -73.93423862971204, 0.0)",40.690528,-73.934239
8,757 Bergen Street,1,Brooklyn,Crown Heights,Brooklyn,New York,2020-01-01,1543688.0,Sale,11238,"757 Bergen Street, Brooklyn, New York, 11238","(757, Bergen Street, Prospect Heights, Brooklyn, Kings County, New York, 11238, United States, (40.6786036, -73.96327695))","(40.6786036, -73.96327695, 0.0)",40.678604,-73.963277
9,721 Monroe Street,,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2021-01-01,1999999.0,Sale,11221,"721 Monroe Street, Brooklyn, New York, 11221","(721, Monroe Street, Bedford-Stuyvesant, Brooklyn, Kings County, New York, 11221, United States, (40.68824595, -73.92800579875049))","(40.68824595, -73.92800579875049, 0.0)",40.688246,-73.928006


In [17]:
df.to_csv("Resources/geopyoutput.csv")