## Part 1a - Load & Clean

In [1]:
# Import libraries
import pandas as pd
import numpy as np

# For Part 1b API Call
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent= 'alison.sadel@gmail.com')
from geopy.extra.rate_limiter import RateLimiter
import webbrowser

# To see full column values
pd.set_option('max_colwidth', 800)


In [2]:
# Create path
file = "Resources/adam.csv"

# Read in file
df = pd.read_csv(file)

# Display rows
rows = df.shape[0]
# Display # columns
      
columns = df.shape[1]

# Display column names
df.columns

print(f'The dataset is comprised of {rows} rows and {columns} columns.')

The dataset is comprised of 241 rows and 10 columns.


In [3]:
# Check datatypes
df.dtypes

Address          object
Unit             object
Borough          object
Neighborhood     object
City             object
State            object
Year              int64
Price           float64
Status           object
Postalcode        int64
dtype: object

In [4]:
# Convert Year to Datetime
df['Year'] = pd.to_datetime(df['Year'], format='%Y')
df.head()

Unnamed: 0,Address,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213


In [5]:
# filter to include only data in New York'
filtered = df['State'] == 'New York'

full = df[filtered] 
full = full.rename(columns= {"Address": "Street"})
full

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213
...,...,...,...,...,...,...,...,...,...,...
236,308 East 38th Street,4C,Manhattan,Murray Hill,New York,New York,2018-01-01,890000.0,SOLD,10016
237,308 East 38th Street,8D,Manhattan,Murray Hill,New York,New York,2018-01-01,899000.0,SOLD,10016
238,308 East 38th Street,16F,Manhattan,Murray Hill,New York,New York,2018-01-01,1556238.0,SOLD,10016
239,308 East 38th Street,11E,Manhattan,Murray Hill,New York,New York,2018-01-01,1374723.0,SOLD,10016


In [6]:
# Create arrays to capture all column values individually
a = np.char.array(full['Street'].values)
b = np.char.array(full['City'].values)
c = np.char.array(full['State'].values)
c = np.char.array(full['State'].values)
d = np.char.array(full['Postalcode'].values)

full['Location'] = a.astype(str) + ', ' + b.astype(str) + ', ' + c.astype(str) + ', ' + d.astype(str)
full

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209,"9602 4th Avenue, New York, New York, 11209"
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231,"94 Degraw Street, Brooklyn, New York, 11231"
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221,"933 Lafayette Street, Brooklyn, New York, 11221"
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222,"918 Manhattan Ave, Brooklyn, New York, 11222"
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213,"906 Prospect Place, Brooklyn, New York, 11213"
...,...,...,...,...,...,...,...,...,...,...,...
236,308 East 38th Street,4C,Manhattan,Murray Hill,New York,New York,2018-01-01,890000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016"
237,308 East 38th Street,8D,Manhattan,Murray Hill,New York,New York,2018-01-01,899000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016"
238,308 East 38th Street,16F,Manhattan,Murray Hill,New York,New York,2018-01-01,1556238.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016"
239,308 East 38th Street,11E,Manhattan,Murray Hill,New York,New York,2018-01-01,1374723.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016"


## Part 1b -  Geocode Latitude & Longitude

In [16]:
# 1
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [17]:
# 2

full['Address'] = full['Location'].apply(geocode)

In [18]:
full

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209,"9602 4th Avenue, New York, New York, 11209","(9602, 4th Avenue, Fort Hamilton, Brooklyn, Kings County, New York, 11209, United States, (40.6151866, -74.032020545154))"
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231,"94 Degraw Street, Brooklyn, New York, 11231","(94, Degraw Street, Columbia Street Waterfront District, Brooklyn, Kings County, New York, 11231, United States, (40.685799349999996, -74.00335189192313))"
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221,"933 Lafayette Street, Brooklyn, New York, 11221",
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222,"918 Manhattan Ave, Brooklyn, New York, 11222","(Green Farms Supermarket, 918, Manhattan Avenue, Brooklyn, Kings County, New York, 11222, United States, (40.7307511, -73.9540256))"
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213,"906 Prospect Place, Brooklyn, New York, 11213","(906, Prospect Place, Eastern Parkway, Brooklyn, Kings County, New York, 11213, United States, (40.67376755, -73.94657791384977))"
...,...,...,...,...,...,...,...,...,...,...,...,...
236,308 East 38th Street,4C,Manhattan,Murray Hill,New York,New York,2018-01-01,890000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))"
237,308 East 38th Street,8D,Manhattan,Murray Hill,New York,New York,2018-01-01,899000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))"
238,308 East 38th Street,16F,Manhattan,Murray Hill,New York,New York,2018-01-01,1556238.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))"
239,308 East 38th Street,11E,Manhattan,Murray Hill,New York,New York,2018-01-01,1374723.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))"


In [19]:
# 3 
full['point'] = full['Address'].apply(lambda loc: tuple(loc.point) if loc else None)
full.point

0                (40.6151866, -74.032020545154, 0.0)
1      (40.685799349999996, -74.00335189192313, 0.0)
2                                               None
3                     (40.7307511, -73.9540256, 0.0)
4             (40.67376755, -73.94657791384977, 0.0)
                           ...                      
236                   (40.7469167, -73.9733582, 0.0)
237                   (40.7469167, -73.9733582, 0.0)
238                   (40.7469167, -73.9733582, 0.0)
239                   (40.7469167, -73.9733582, 0.0)
240    (40.620909612244894, -74.02822655102041, 0.0)
Name: point, Length: 236, dtype: object

In [20]:
full.point

0                (40.6151866, -74.032020545154, 0.0)
1      (40.685799349999996, -74.00335189192313, 0.0)
2                                               None
3                     (40.7307511, -73.9540256, 0.0)
4             (40.67376755, -73.94657791384977, 0.0)
                           ...                      
236                   (40.7469167, -73.9733582, 0.0)
237                   (40.7469167, -73.9733582, 0.0)
238                   (40.7469167, -73.9733582, 0.0)
239                   (40.7469167, -73.9733582, 0.0)
240    (40.620909612244894, -74.02822655102041, 0.0)
Name: point, Length: 236, dtype: object

In [21]:
# 4a - Extract Data as Series
coordinates_series = full['point'].apply(pd.Series)
coordinates_series

#4b - Add Latitude and Longitude as columns in dataframe
full['Latitude'] = coordinates_series[0]

full['Longitude'] = coordinates_series[1]

#4c - Find Missing Values
# Find None Values by Creating a bool series True for NaN values 
bool_series = pd.isnull(full["Latitude"]) 
#bool_series = pd.isnull(full["Longitude"]) 

# Display data only where columns = NaN 
full[bool_series]

# 4d Add missing values in Lat/Long columns

full.loc[2,'Latitude'] = 40.692015
full.loc[2,'Longitude'] = -73.934678

full.loc[31,'Latitude'] = 40.619061
full.loc[31,'Longitude'] = -73.955139

full.loc[104,'Latitude'] = 40.77844
full.loc[104,'Longitude'] = -73.945538

full

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address,point,Latitude,Longitude
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209,"9602 4th Avenue, New York, New York, 11209","(9602, 4th Avenue, Fort Hamilton, Brooklyn, Kings County, New York, 11209, United States, (40.6151866, -74.032020545154))","(40.6151866, -74.032020545154, 0.0)",40.615187,-74.032021
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231,"94 Degraw Street, Brooklyn, New York, 11231","(94, Degraw Street, Columbia Street Waterfront District, Brooklyn, Kings County, New York, 11231, United States, (40.685799349999996, -74.00335189192313))","(40.685799349999996, -74.00335189192313, 0.0)",40.685799,-74.003352
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221,"933 Lafayette Street, Brooklyn, New York, 11221",,,40.692015,-73.934678
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222,"918 Manhattan Ave, Brooklyn, New York, 11222","(Green Farms Supermarket, 918, Manhattan Avenue, Brooklyn, Kings County, New York, 11222, United States, (40.7307511, -73.9540256))","(40.7307511, -73.9540256, 0.0)",40.730751,-73.954026
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213,"906 Prospect Place, Brooklyn, New York, 11213","(906, Prospect Place, Eastern Parkway, Brooklyn, Kings County, New York, 11213, United States, (40.67376755, -73.94657791384977))","(40.67376755, -73.94657791384977, 0.0)",40.673768,-73.946578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236,308 East 38th Street,4C,Manhattan,Murray Hill,New York,New York,2018-01-01,890000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))","(40.7469167, -73.9733582, 0.0)",40.746917,-73.973358
237,308 East 38th Street,8D,Manhattan,Murray Hill,New York,New York,2018-01-01,899000.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))","(40.7469167, -73.9733582, 0.0)",40.746917,-73.973358
238,308 East 38th Street,16F,Manhattan,Murray Hill,New York,New York,2018-01-01,1556238.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))","(40.7469167, -73.9733582, 0.0)",40.746917,-73.973358
239,308 East 38th Street,11E,Manhattan,Murray Hill,New York,New York,2018-01-01,1374723.0,SOLD,10016,"308 East 38th Street, New York, New York, 10016","(308, East 38th Street, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States, (40.7469167, -73.9733582))","(40.7469167, -73.9733582, 0.0)",40.746917,-73.973358


In [22]:
full.to_csv("Resources/output.csv")