# The following notebook appends the distance data to the dataframe and saves it as a csv

In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

#load dataframes

apartments = pd.read_csv('datasets/clean.csv')
bars = pd.read_csv('datasets/bar_locations.csv')

# Calculate distances between each apartment and each bar
distances = cdist(apartments[['lat', 'long']].values, bars[['Latitude', 'Longitude']].values)

# Find the index of the closest bar for each apartment
closest_bar_index = np.argmin(distances, axis=1)

# Extract the distance to the closest bar for each apartment
closest_bar_distance = distances[np.arange(distances.shape[0]), closest_bar_index]

# Add the distance to the closest bar as a new column in the apartments dataframe
apartments['distance_to_closest_bar'] = closest_bar_distance
apartments['distance_to_closest_bar_m'] = apartments['distance_to_closest_bar'] * 111139

In [2]:
del apartments['Unnamed: 0']

In [3]:
apartments['host_identity_verified'] = apartments['host_identity_verified'].apply(lambda x: x=='verified')

In [4]:
apartments.head()

Unnamed: 0,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,instant_bookable,cancellation_policy,...,Construction year,price,service fee,minimum nights,number of reviews,review rate number,calculated host listings count,availability 365,distance_to_closest_bar,distance_to_closest_bar_m
0,Clean & quiet apt home by the park,80014485718,False,Madaline,Brooklyn,Kensington,40.64749,-73.97237,False,strict,...,2020.0,966,193,10.0,9.0,4.0,6.0,286.0,0.002838,315.406543
1,Skylit Midtown Castle,52335172823,True,Jenna,Manhattan,Midtown,40.75362,-73.98377,False,moderate,...,2007.0,142,28,30.0,45.0,4.0,2.0,228.0,0.001348,149.861387
2,Entire Apt: Spacious Studio/Loft by central park,92037596077,True,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,False,moderate,...,2009.0,204,41,10.0,9.0,3.0,1.0,289.0,0.001498,166.463794
3,Large Cozy 1 BR Apartment In Midtown East,45498551794,True,Michelle,Manhattan,Murray Hill,40.74767,-73.975,True,flexible,...,2013.0,577,115,3.0,74.0,3.0,1.0,374.0,0.001689,187.663096
4,BlissArtsSpace!,90821839709,False,Emma,Brooklyn,Bedford-Stuyvesant,40.68688,-73.95596,False,moderate,...,2009.0,1060,212,45.0,49.0,5.0,1.0,219.0,0.001005,111.642482


In [33]:

apartments.to_csv('datasets/superclean.csv')