In [1]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

## Lat/Long Information for Street Easy Neighborhoods

While working with Tableau, visualization errors occurred while trying to visualize data that was obtained from blending two different sources. To solve this problem, we proceed by adding lat/long information to the existing `street_easy_nyc.csv` file, and will use this new file as the data source in Tableau.

In [2]:
df = pd.read_csv('../../data/dataxp/airbnb_project/cleaned_data/street_easy_nyc.csv')

In [3]:
# Making sure that this dataframe is equivalent in size to the SELECT * FROM street_easy_nyc SQL query.

df.info

<bound method DataFrame.info of               area_name    borough bedrooms   price  year  month
0              Downtown  Manhattan        0  2350.0  2010      1
1               Midtown  Manhattan        0  2000.0  2010      1
2       Upper East Side  Manhattan        0  1750.0  2010      1
3       Upper Manhattan  Manhattan        0  1175.0  2010      1
4       Upper West Side  Manhattan        0  1800.0  2010      1
...                 ...        ...      ...     ...   ...    ...
110875  Windsor Terrace   Brooklyn   3 plus     NaN  2021      8
110876        Woodhaven     Queens   3 plus     NaN  2021      8
110877         Woodlawn      Bronx   3 plus     NaN  2021      8
110878         Woodside     Queens   3 plus  2595.0  2021      8
110879        Woodstock      Bronx   3 plus     NaN  2021      8

[110880 rows x 6 columns]>

## Adding Lat/Long Information for Each Neighborhood

In [4]:
# We need a lat/long coordinate for all of the neighborhoods in the DataFrame

locations = pd.DataFrame({'neighborhood': df['area_name'].unique()})

In [5]:
# Adding ', New York City' will make sure we geolocate the correct neighborhood.

locations['neighborhood'] = locations['neighborhood'].apply(lambda x: str(x) + ', New York City')

In [6]:
# Viewing the change

locations.head()

Unnamed: 0,neighborhood
0,"Downtown, New York City"
1,"Midtown, New York City"
2,"Upper East Side, New York City"
3,"Upper Manhattan, New York City"
4,"Upper West Side, New York City"


In [7]:
geolocator = Nominatim(timeout = 10, user_agent = 'my_geolocator')

In [8]:
# This is how we will get the lat/long coordinates

geolocator.geocode('Upper East Side, New York City')[1]

(40.7737016, -73.9641196)

In [9]:
lat_long = []

for neighborhood in locations['neighborhood']:
    
    try:
        coords = geolocator.geocode(neighborhood)[1]
        lat_long.append(coords)
        
    except (TypeError, NameError):
        lat_long.append(None)

In [10]:
# Making sure that lat_long and locations are the same length

print (len(lat_long))
print (len(locations))

195
195
