## Visualizations of NYC POI Data

### Inner Join

In [174]:
# Import modules
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as spstats
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler

In [175]:
# Load POI datasets
gp = pd.read_csv('./clean_data/gp_poi_clean.csv',)
yelp = pd.read_csv('./clean_data/yelp_poi_cleaned.csv')
fsq = pd.read_csv('./clean_data/fsq_poi_clean.csv')

In [176]:
# importing borough & neighbourhood dataframes
nyc_bo = pd.read_csv('./clean_data/nyc_boroughs.csv') # boroughs
nyc_nb = pd.read_csv('./clean_data/nyc_neighbs.csv',encoding='cp1252') # neighbourhoods

In [177]:
nyc_nb.head()

Unnamed: 0,Port Richmond,South Beach and Tottenville,Stapleton and St. George,Mid-Island (Willowbrook),Inwood and Washington Heights,Central Harlem,East Harlem,Upper West Side,Upper East Side,Chelsea and Clinton,...,Northwest Brooklyn,Central Brooklyn (Bedford Stuyvesant – Crown Heights),East New York and New Lots,Sunset Park,Southwest Brooklyn,Borough Park,Canarsie and Flatlands,Southern Brooklyn,Flatbush,Bushwick and Williamsburg
0,10302.0,10306.0,10301.0,10314.0,10031.0,10026.0,10029.0,10023.0,10021.0,10001.0,...,11201.0,11212.0,11207.0,11220.0,11209.0,11204.0,11234.0,11223.0,11203.0,11206.0
1,10303.0,10307.0,10304.0,,10032.0,10027.0,10035.0,10024.0,10028.0,10011.0,...,11205.0,11213.0,11208.0,11232.0,11214.0,11218.0,11236.0,11224.0,11210.0,11221.0
2,10310.0,10308.0,10305.0,,10033.0,10030.0,,10025.0,10044.0,10018.0,...,11215.0,11216.0,,,11228.0,11219.0,11239.0,11229.0,11225.0,11237.0
3,,,,,10034.0,10037.0,,,10065.0,10019.0,...,11217.0,11233.0,,,,11230.0,,11235.0,11226.0,
4,,,,,10040.0,10039.0,,,10075.0,10020.0,...,11231.0,11238.0,,,,,,,,


In [178]:
nyc_bo.head()

Unnamed: 0,Manhattan,Staten Island,Bronx,Queens,Brooklyn
0,10001.0,10301.0,10451.0,11004,11201.0
1,10002.0,10302.0,10452.0,11005,11202.0
2,10003.0,10303.0,10453.0,11006,11203.0
3,10004.0,10304.0,10454.0,11007,11204.0
4,10005.0,10305.0,10455.0,11008,11205.0


In [179]:
yelp.head()

Unnamed: 0.1,Unnamed: 0,Name,Reviews,Rating,Price,Distance (m),Latitude,Longitude,Address,Borough,Zipcode,Category
0,0,Ripe Kitchen & Bar,287,4.0,$$,805.077589,40.898214,-73.838821,151 W Sandford Blvd,Mount Vernon,10550,restaurant
1,1,H.I.M Ital Health Food Market,47,4.5,$,715.467929,40.897633,-73.854665,4374B White Plains Rd,Bronx,10466,restaurant
2,2,Ali's Roti Shop,104,4.0,$,809.642747,40.894,-73.85684,4220 White Plains Rd,Bronx,10466,restaurant
3,3,Paula's Soul Cafe,202,3.0,$$,746.891106,40.89269,-73.85568,746 E 233rd St,Bronx,10466,restaurant
4,4,Jerk House,62,3.5,$$,771.525854,40.89464,-73.85648,4246 White Plains,Bronx,10466,restaurant


In [180]:
yelp.drop(columns=['Unnamed: 0','Address'], inplace=True)

In [181]:
# need to change price to number scale
yelp['Price'].unique()

array(['$$', '$', '$$$', nan, '$$$$'], dtype=object)

In [182]:
# looks like scale is 1-4
yelp['Price'].replace({'$':1, '$$' :2, '$$$':3, '$$$$':4}, inplace=True)

In [183]:
yelp['Price'].unique()

array([ 2.,  1.,  3., nan,  4.])

In [184]:
# there are still some nans, but theoretically some will get fixed once we merge

In [185]:
fsq.head()

Unnamed: 0.1,Unnamed: 0,Distance (m),Name,Price,Rating,Borough,Neighborhood,Zipcode,Latitude,Longitude,Category
0,0,127.0,Lollipops Gelato,1.0,8.6,Bronx,Edenwald,10466.0,40.894705,-73.847201,restaurant
1,1,797.0,Ripe Kitchen and Bar,2.0,8.4,Mount Vernon,Wakefield,10550.0,40.894705,-73.847201,restaurant
2,2,821.0,Ali's Roti Shop,1.0,8.1,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant
3,3,983.0,Jimbo's Hamburger Palace,1.0,8.0,Bronx,Williambridge,10466.0,40.894705,-73.847201,restaurant
4,4,454.0,Cooler Runnings Jamaican Restaurant,2.0,6.4,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant


In [186]:
fsq.rename(columns={'Neighborhood':'Neighbourhood'}, inplace=True)
fsq.drop(columns='Unnamed: 0', inplace=True)

In [187]:
fsq.head()

Unnamed: 0,Distance (m),Name,Price,Rating,Borough,Neighbourhood,Zipcode,Latitude,Longitude,Category
0,127.0,Lollipops Gelato,1.0,8.6,Bronx,Edenwald,10466.0,40.894705,-73.847201,restaurant
1,797.0,Ripe Kitchen and Bar,2.0,8.4,Mount Vernon,Wakefield,10550.0,40.894705,-73.847201,restaurant
2,821.0,Ali's Roti Shop,1.0,8.1,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant
3,983.0,Jimbo's Hamburger Palace,1.0,8.0,Bronx,Williambridge,10466.0,40.894705,-73.847201,restaurant
4,454.0,Cooler Runnings Jamaican Restaurant,2.0,6.4,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant


In [188]:
gp.head()

Unnamed: 0,Name,Price,Rating,Reviews,Borough,Neighborhood,Address,Zipcode,Latitude,Longitude,Category
0,New China Garden,1.0,3.9,85.0,Bronx,Northeast Bronx,"724 Nereid Avenue, The Bronx",10466.0,40.897919,-73.853364,restaurant
1,El Jobo,,3.3,216.0,Bronx,Northeast Bronx,"748 East 233rd Street, The Bronx",10466.0,40.892678,-73.855632,restaurant
2,Paula's,2.0,3.9,697.0,Bronx,Northeast Bronx,"746 East 233rd Street, The Bronx",10466.0,40.892692,-73.855678,restaurant
3,Jerk House Caribbean Restaurant,,4.2,429.0,Bronx,Northeast Bronx,"4246 White Plains Road, The Bronx",10466.0,40.894663,-73.8564,restaurant
4,McDonald's,1.0,3.7,653.0,Bronx,Northeast Bronx,"4174 White Plains Road, The Bronx",10466.0,40.892779,-73.857473,restaurant


In [189]:
gp.rename(columns={'Neighborhood':'Neighbourhood'}, inplace=True)
gp.drop(columns=['Address'], inplace=True)

In [190]:
print('Price:')
print(fsq['Price'].describe())
print('Rating:')
print(fsq['Rating'].describe())

Price:
count    6269.000000
mean        1.594353
std         0.704104
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         4.000000
Name: Price, dtype: float64
Rating:
count    4685.000000
mean        7.803308
std         1.004244
min         4.500000
25%         7.100000
50%         8.000000
75%         8.700000
max         9.600000
Name: Rating, dtype: float64


In [191]:
print('Price:')
print(yelp['Price'].describe())
print('Rating:')
print(yelp['Rating'].describe())

Price:
count    6116.000000
mean        1.797907
std         0.629113
min         1.000000
25%         1.000000
50%         2.000000
75%         2.000000
max         4.000000
Name: Price, dtype: float64
Rating:
count    7115.000000
mean        3.916936
std         0.630598
min         1.000000
25%         3.500000
50%         4.000000
75%         4.500000
max         5.000000
Name: Rating, dtype: float64


In [192]:
print('Price:')
print(gp['Price'].describe())
print('Rating:')
print(gp['Rating'].describe())

Price:
count    3583.000000
mean        1.585822
std         0.628150
min         0.000000
25%         1.000000
50%         2.000000
75%         2.000000
max         4.000000
Name: Price, dtype: float64
Rating:
count    10862.000000
mean         4.129507
std          0.673147
min          1.000000
25%          3.900000
50%          4.200000
75%          4.500000
max          5.000000
Name: Rating, dtype: float64


In [193]:
# setting range for rating to be 1-5 since prices are all already on a scale of 1-4
scaler = MinMaxScaler((1,5))

# Rescale rating
fsq['Rating'] = pd.DataFrame(scaler.fit_transform(fsq['Rating'].values.reshape(-1,1)))
yelp['Rating'] = pd.DataFrame(scaler.fit_transform(yelp['Rating'].values.reshape(-1,1)))
gp['Rating'] = pd.DataFrame(scaler.fit_transform(gp['Rating'].values.reshape(-1,1)))


In [194]:
print('Foursquare Shape:', fsq.shape)
print('Yelp Shape:', yelp.shape)
print('Google Places Shape:', gp.shape)

Foursquare Shape: (7001, 10)
Yelp Shape: (7115, 10)
Google Places Shape: (15516, 10)


In [195]:
# Merging Dataframes

In [196]:
poi_merge = fsq.merge(gp, how='outer', on=('Name','Zipcode'))

In [197]:
poi_merge = poi_merge.merge(yelp, how='outer', on=('Name','Zipcode'))

In [198]:
poi_merge.head()

Unnamed: 0,Distance (m)_x,Name,Price_x,Rating_x,Borough_x,Neighbourhood_x,Zipcode,Latitude_x,Longitude_x,Category_x,...,Longitude_y,Category_y,Reviews_y,Rating,Price,Distance (m)_y,Latitude,Longitude,Borough,Category
0,127.0,Lollipops Gelato,1.0,4.215686,Bronx,Edenwald,10466.0,40.894705,-73.847201,restaurant,...,,,,,,,,,,
1,797.0,Ripe Kitchen and Bar,2.0,4.058824,Mount Vernon,Wakefield,10550.0,40.894705,-73.847201,restaurant,...,,,,,,,,,,
2,821.0,Ali's Roti Shop,1.0,3.823529,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant,...,-73.856825,restaurant,104.0,4.0,1.0,809.642747,40.894,-73.85684,Bronx,restaurant
3,821.0,Ali's Roti Shop,1.0,3.823529,Bronx,Wakefield,10466.0,40.894705,-73.847201,restaurant,...,-73.856825,restaurant,104.0,4.0,1.0,1004.046648,40.894,-73.85684,Bronx,restaurant
4,983.0,Jimbo's Hamburger Palace,1.0,3.745098,Bronx,Williambridge,10466.0,40.894705,-73.847201,restaurant,...,,,24.0,3.0,1.0,1011.434983,40.891804,-73.858604,Bronx,restaurant


In [199]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27728 entries, 0 to 27727
Data columns (total 26 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Distance (m)_x   8958 non-null   float64
 1   Name             27726 non-null  object 
 2   Price_x          8069 non-null   float64
 3   Rating_x         6239 non-null   float64
 4   Borough_x        8958 non-null   object 
 5   Neighbourhood_x  7858 non-null   object 
 6   Zipcode          27722 non-null  float64
 7   Latitude_x       8960 non-null   float64
 8   Longitude_x      8960 non-null   float64
 9   Category_x       8960 non-null   object 
 10  Price_y          5027 non-null   float64
 11  Rating_y         12444 non-null  float64
 12  Reviews_x        12444 non-null  float64
 13  Borough_y        17098 non-null  object 
 14  Neighbourhood_y  16510 non-null  object 
 15  Latitude_y       17098 non-null  float64
 16  Longitude_y      17098 non-null  float64
 17  Category_y  

In [200]:
poi_merge['Price'].fillna(poi_merge['Price_x'], inplace=True)
poi_merge['Price'].fillna(poi_merge['Price_y'], inplace=True)
poi_merge['Rating'].fillna(poi_merge['Rating_x'], inplace=True)
poi_merge['Rating'].fillna(poi_merge['Rating_y'], inplace=True)
poi_merge['Reviews_x'].fillna(poi_merge['Reviews_y'], inplace=True)
poi_merge['Borough'].fillna(poi_merge['Borough_x'], inplace=True)
poi_merge['Borough'].fillna(poi_merge['Borough_y'], inplace=True)
poi_merge['Neighbourhood_x'].fillna(poi_merge['Neighbourhood_y'], inplace=True)
poi_merge['Latitude'].fillna(poi_merge['Latitude_x'], inplace=True)
poi_merge['Latitude'].fillna(poi_merge['Latitude_y'], inplace=True)
poi_merge['Longitude'].fillna(poi_merge['Longitude_x'], inplace=True)
poi_merge['Longitude'].fillna(poi_merge['Longitude_y'], inplace=True)
poi_merge['Category'].fillna(poi_merge['Category_x'], inplace=True)
poi_merge['Category'].fillna(poi_merge['Category_y'], inplace=True)
poi_merge['Distance (m)_x'].fillna(poi_merge['Distance (m)_y'], inplace=True)

In [201]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27728 entries, 0 to 27727
Data columns (total 26 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Distance (m)_x   13689 non-null  float64
 1   Name             27726 non-null  object 
 2   Price_x          8069 non-null   float64
 3   Rating_x         6239 non-null   float64
 4   Borough_x        8958 non-null   object 
 5   Neighbourhood_x  22448 non-null  object 
 6   Zipcode          27722 non-null  float64
 7   Latitude_x       8960 non-null   float64
 8   Longitude_x      8960 non-null   float64
 9   Category_x       8960 non-null   object 
 10  Price_y          5027 non-null   float64
 11  Rating_y         12444 non-null  float64
 12  Reviews_x        18773 non-null  float64
 13  Borough_y        17098 non-null  object 
 14  Neighbourhood_y  16510 non-null  object 
 15  Latitude_y       17098 non-null  float64
 16  Longitude_y      17098 non-null  float64
 17  Category_y  

In [202]:
poi_merge.drop(columns=['Price_x', 'Price_y', 'Rating_x', 'Rating_y', 'Reviews_y', 'Borough_x', 'Borough_y',
                        'Latitude_x','Latitude_y','Longitude_x','Longitude_y', 'Neighbourhood_y',
                        'Category_x', 'Category_y', 'Distance (m)_y'], inplace=True)

In [203]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27728 entries, 0 to 27727
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Distance (m)_x   13689 non-null  float64
 1   Name             27726 non-null  object 
 2   Neighbourhood_x  22448 non-null  object 
 3   Zipcode          27722 non-null  float64
 4   Reviews_x        18773 non-null  float64
 5   Rating           21352 non-null  float64
 6   Price            14525 non-null  float64
 7   Latitude         27728 non-null  float64
 8   Longitude        27728 non-null  float64
 9   Borough          27726 non-null  object 
 10  Category         27728 non-null  object 
dtypes: float64(7), object(4)
memory usage: 2.5+ MB


In [204]:
poi_merge.rename(columns={'Distance (m)_x':'Distance (m)', 'Neighbourhood_x':'Neighbourhood', 'Reviews_x' :'Reviews'}, inplace=True)

In [205]:
poi_merge

Unnamed: 0,Distance (m),Name,Neighbourhood,Zipcode,Reviews,Rating,Price,Latitude,Longitude,Borough,Category
0,127.000000,Lollipops Gelato,Edenwald,10466.0,,4.215686,1.0,40.894705,-73.847201,Bronx,restaurant
1,797.000000,Ripe Kitchen and Bar,Wakefield,10550.0,,4.058824,2.0,40.894705,-73.847201,Mount Vernon,restaurant
2,821.000000,Ali's Roti Shop,Wakefield,10466.0,545.0,4.000000,1.0,40.894000,-73.856840,Bronx,restaurant
3,821.000000,Ali's Roti Shop,Wakefield,10466.0,545.0,4.000000,1.0,40.894000,-73.856840,Bronx,restaurant
4,983.000000,Jimbo's Hamburger Palace,Williambridge,10466.0,24.0,3.000000,1.0,40.891804,-73.858604,Bronx,restaurant
...,...,...,...,...,...,...,...,...,...,...,...
27723,1120.782327,Quick pizza,,11691.0,2.0,3.000000,,40.606191,-73.754534,Rockaway,restaurant
27724,411.420036,The Penthouse - Ravel Hotel,,11101.0,1028.0,2.500000,3.0,40.754123,-73.949178,Long Island City,restaurant
27725,1130.740857,Anable Basin Sailing Bar & Grill,,11101.0,198.0,4.000000,2.0,40.749385,-73.955090,Long Island City,restaurant
27726,746.648503,The Local Bar and Cafe,,11101.0,87.0,4.500000,1.0,40.749562,-73.947701,Long Island City,restaurant


In [206]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27728 entries, 0 to 27727
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Distance (m)   13689 non-null  float64
 1   Name           27726 non-null  object 
 2   Neighbourhood  22448 non-null  object 
 3   Zipcode        27722 non-null  float64
 4   Reviews        18773 non-null  float64
 5   Rating         21352 non-null  float64
 6   Price          14525 non-null  float64
 7   Latitude       27728 non-null  float64
 8   Longitude      27728 non-null  float64
 9   Borough        27726 non-null  object 
 10  Category       27728 non-null  object 
dtypes: float64(7), object(4)
memory usage: 2.5+ MB


In [207]:
poi_merge.groupby('Category').count()

Unnamed: 0_level_0,Distance (m),Name,Neighbourhood,Zipcode,Reviews,Rating,Price,Latitude,Longitude,Borough
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
park,0,2293,2227,2292,1904,1904,10,2293,2293,2293
restaurant,13689,16223,11303,16220,11841,14420,14302,16225,16225,16223
school,0,3572,3473,3572,2342,2342,3,3572,3572,3572
supermarket,0,1464,1444,1464,1381,1381,209,1464,1464,1464
transit_station,0,4174,4001,4174,1305,1305,1,4174,4174,4174


In [208]:
# creating lists of borough & neighbourhood names
bo_list = nyc_bo.columns.tolist()
nb_list = nyc_nb.columns.tolist()

In [209]:
# want to make sure we have consistent borough & neighbourhood info
def assign_borough(borough):
    """ Finds borough name for address dataframe.
    
    Loops through the nyc_bo (nyc boroughs) dataframe to see if one of its zipcodes 
    is within the address string of our addresses dataframe. Then fills in borough name
    
    Params:
        borough: desired borough (column from nyc_bo dataframe)
        
    Returns:
        fills 'Borough' value with column name from nyc_bo
    
    """
    num_rows = nyc_bo.shape[0]
    row_count = 0

    while row_count < num_rows:
        poi_merge.loc[poi_merge['Zipcode'] ==nyc_bo[borough][row_count], 'Borough'] = borough
        
        row_count +=1

In [210]:
# loop to run borough assignment
"""
iterates through list of borough names & inserts them runs assign_borough function every value
"""
for i in range(len(bo_list)):
    assign_borough(bo_list[i])

In [211]:
# want to make sure we have consistent borough & neighbourhood info
def assign_neighbourhood(neighbourhood):
    """ Finds neighbourhood name for address dataframe.
    
    Loops through the nyc_bo (nyc boroughs) dataframe to see if one of its zipcodes 
    is within the address string of our addresses dataframe. Then fills in borough name & cleanly formatted zipcode
    
    Params:
        borough: desired neighbourhood (column from nyc_bo dataframe)
        
    Returns:
        fills 'Neighbourhood' value with column name from nyc_nb
    
    """
    num_rows = nyc_nb.shape[0]
    row_count = 0

    while row_count < num_rows:
        poi_merge.loc[poi_merge['Zipcode'] ==nyc_nb[neighbourhood][row_count], 'Neighbourhood'] = neighbourhood
        
        row_count +=1

In [212]:
# loop to run neighbourhood assignment
"""
iterates through list of neighbourhood names & inserts them runs assign_borough function every value
"""
for i in range(len(nb_list)):
    assign_neighbourhood(nb_list[i])

In [213]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27728 entries, 0 to 27727
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Distance (m)   13689 non-null  float64
 1   Name           27726 non-null  object 
 2   Neighbourhood  26912 non-null  object 
 3   Zipcode        27722 non-null  float64
 4   Reviews        18773 non-null  float64
 5   Rating         21352 non-null  float64
 6   Price          14525 non-null  float64
 7   Latitude       27728 non-null  float64
 8   Longitude      27728 non-null  float64
 9   Borough        27726 non-null  object 
 10  Category       27728 non-null  object 
dtypes: float64(7), object(4)
memory usage: 2.5+ MB


In [214]:
# Drop unused rows that are not part of any borough
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Mount Vernon'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Pelham Manor'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Pelham'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Yonkers'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Long Island City'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Floral Park'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Bellerose'].index, inplace=True)
poi_merge.drop(poi_merge[poi_merge['Borough'] == 'Elizabeth'].index, inplace=True)

In [215]:
# dropping duplicates where name, zipcode, and category are all the same
len(poi_merge[poi_merge.duplicated(subset=['Name','Zipcode', 'Category'])])

5813

In [216]:
poi_merge.drop_duplicates(subset=['Name','Zipcode', 'Category'], inplace=True)

In [217]:
# looking for null neighbourhoods after running above algorithm
len(poi_merge[poi_merge['Neighbourhood'].isna()])

601

In [218]:
# not a big number, so we can just drop them
poi_merge.dropna(subset='Neighbourhood',inplace=True)

In [219]:
poi_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21251 entries, 0 to 27727
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Distance (m)   7873 non-null   float64
 1   Name           21251 non-null  object 
 2   Neighbourhood  21251 non-null  object 
 3   Zipcode        21249 non-null  float64
 4   Reviews        13631 non-null  float64
 5   Rating         15480 non-null  float64
 6   Price          8960 non-null   float64
 7   Latitude       21251 non-null  float64
 8   Longitude      21251 non-null  float64
 9   Borough        21251 non-null  object 
 10  Category       21251 non-null  object 
dtypes: float64(7), object(4)
memory usage: 1.9+ MB


In [220]:
# Export for cleaning
poi_merge.to_csv('clean_data/merged_pois_cleaned.csv')

In [221]:
poi_merge.describe()

Unnamed: 0,Distance (m),Zipcode,Reviews,Rating,Price,Latitude,Longitude
count,7873.0,21249.0,13631.0,15480.0,8960.0,21251.0,21251.0
mean,640.299756,10837.086169,346.168293,3.982463,1.654353,40.716543,-73.927634
std,1025.834403,537.690894,1291.016912,0.734139,0.657797,0.089286,0.10155
min,3.0,10001.0,1.0,1.0,0.0,40.499251,-74.252618
25%,362.842535,10310.0,10.0,3.6,1.0,40.642818,-73.983182
50%,577.104203,11201.0,68.0,4.0,2.0,40.715576,-73.925167
75%,802.0,11238.0,311.0,4.5,2.0,40.775048,-73.857446
max,23548.623893,11697.0,75227.0,5.0,4.0,40.912785,-73.69901
