In [43]:
# import libraries

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import requests
import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
from bs4 import BeautifulSoup

print('Necessary libraries imported.')

Necessary libraries imported.


In [52]:
# import data

ny_data = json.loads(requests.get('https://cocl.us/new_york_dataset').text)
print('New York data imported.')

New York data imported.


In [47]:
# create dataframe

column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']
neighborhoods = pd.DataFrame(columns = column_names)
neighborhoods_data = ny_data['features']
for data in neighborhoods_data:
    borough = data['properties']['borough']
    neighborhood_name = data['properties']['name']
    neighborhood_geo = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_geo[1]
    neighborhood_lon = neighborhood_geo[0]
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index = True)
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [48]:
# group data

neighborhoods_cleaned = neighborhoods[neighborhoods.Borough != "Not assigned"].reset_index(drop = True)
neighborhoods_cleaned.head()
neighborhoods_grouped = neighborhoods_cleaned.groupby(['Borough', 'Latitude', 'Longitude'], as_index = False).agg(lambda x: ','.join(x))
neighborhoods_grouped.head()

Unnamed: 0,Borough,Latitude,Longitude,Neighborhood
0,Bronx,40.801664,-73.913221,Port Morris
1,Bronx,40.806239,-73.9161,Mott Haven
2,Bronx,40.806551,-73.854144,Clason Point
3,Bronx,40.80973,-73.883315,Hunts Point
4,Bronx,40.815099,-73.895788,Longwood


In [53]:
# import data
res = requests.get("https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))
print(df[0].to_json(orient='records'))
df = df[0]
df.head()

[{"Borough":"Bronx","Neighborhood":"Central Bronx","ZIP Codes":"10453, 10457, 10460"},{"Borough":"Bronx","Neighborhood":"Bronx Park and Fordham","ZIP Codes":"10458, 10467, 10468"},{"Borough":"Bronx","Neighborhood":"High Bridge and Morrisania","ZIP Codes":"10451, 10452, 10456"},{"Borough":"Bronx","Neighborhood":"Hunts Point and Mott Haven","ZIP Codes":"10454, 10455, 10459, 10474"},{"Borough":"Bronx","Neighborhood":"Kingsbridge and Riverdale","ZIP Codes":"10463, 10471"},{"Borough":"Bronx","Neighborhood":"Northeast Bronx","ZIP Codes":"10466, 10469, 10470, 10475"},{"Borough":"Bronx","Neighborhood":"Southeast Bronx","ZIP Codes":"10461, 10462,10464, 10465, 10472, 10473"},{"Borough":"Brooklyn","Neighborhood":"Central Brooklyn","ZIP Codes":"11212, 11213, 11216, 11233, 11238"},{"Borough":"Brooklyn","Neighborhood":"Southwest Brooklyn","ZIP Codes":"11209, 11214, 11228"},{"Borough":"Brooklyn","Neighborhood":"Borough Park","ZIP Codes":"11204, 11218, 11219, 11230"},{"Borough":"Brooklyn","Neighborhoo

Unnamed: 0,Borough,Neighborhood,ZIP Codes
0,Bronx,Central Bronx,"10453, 10457, 10460"
1,Bronx,Bronx Park and Fordham,"10458, 10467, 10468"
2,Bronx,High Bridge and Morrisania,"10451, 10452, 10456"
3,Bronx,Hunts Point and Mott Haven,"10454, 10455, 10459, 10474"
4,Bronx,Kingsbridge and Riverdale,"10463, 10471"


In [57]:
# clean data

df[['ZIP Codes','Borough']]

Unnamed: 0,ZIP Codes,Borough
0,"10453, 10457, 10460",Bronx
1,"10458, 10467, 10468",Bronx
2,"10451, 10452, 10456",Bronx
3,"10454, 10455, 10459, 10474",Bronx
4,"10463, 10471",Bronx
5,"10466, 10469, 10470, 10475",Bronx
6,"10461, 10462,10464, 10465, 10472, 10473",Bronx
7,"11212, 11213, 11216, 11233, 11238",Brooklyn
8,"11209, 11214, 11228",Brooklyn
9,"11204, 11218, 11219, 11230",Brooklyn


In [81]:
# merge dataframes

neighborhoods = pd.merge(df[['ZIP Codes','Borough']], neighborhoods, on = 'Borough')
neighborhoods.head()

Unnamed: 0,ZIP Codes,Borough,ZIP Codes_x,ZIP Codes_y,Latitude,Longitude,Neighborhood
0,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.801664,-73.913221,Port Morris
1,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.806239,-73.9161,Mott Haven
2,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.806551,-73.854144,Clason Point
3,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.80973,-73.883315,Hunts Point
4,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.815099,-73.895788,Longwood


In [82]:
wakefield_data = neighborhoods[neighborhoods['Borough'] == 'Bronx'].reset_index(drop = True)
wakefield_data.head()

Unnamed: 0,ZIP Codes,Borough,ZIP Codes_x,ZIP Codes_y,Latitude,Longitude,Neighborhood
0,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.801664,-73.913221,Port Morris
1,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.806239,-73.9161,Mott Haven
2,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.806551,-73.854144,Clason Point
3,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.80973,-73.883315,Hunts Point
4,"10453, 10457, 10460",Bronx,"10453, 10457, 10460","10453, 10457, 10460",40.815099,-73.895788,Longwood


In [108]:
wakefield_data.loc[0, 'Neighborhood']

'Breezy Point'

In [109]:
neighborhood_latitude = wakefield_data.loc[0, 'Latitude']
neighborhood_longitude = wakefield_data.loc[0, 'Longitude']

neighborhood_name = wakefield_data.loc[0, 'Neighborhood']

print('Geographical location of {} are {}, {}.'.format(neighborhood_name, neighborhood_latitude, neighborhood_longitude))

Geographical location of Breezy Point are 40.55740128845452, -73.92551196994168.


In [93]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    client_ID, 
    client_secret, 
    version, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d4214a966dc060025c50e58'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Port Morris',
  'headerFullLocation': 'Port Morris, Bronx',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 15,
  'suggestedBounds': {'ne': {'lat': 40.80616363225621,
    'lng': -73.90728778171822},
   'sw': {'lat': 40.7971636232562, 'lng': -73.91915500600447}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5151c128e4b026975853f870',
       'name': 'The Bronx Brewery',
       'location': {'address': '856 E 136th St',
        'crossStreet': 'between Willow Ave and Walnut Ave',
        'lat': 40.80177432001329,
   

In [110]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [96]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis = 1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Bronx Brewery,Brewery,40.801774,-73.910297
1,Port Morris Distillery,Distillery,40.800526,-73.913513
2,Pio Pio,Peruvian Restaurant,40.806047,-73.914185
3,U-Haul Moving & Storage of Port Morris,Storage Facility,40.803902,-73.911248
4,Dunkin',Donut Shop,40.80567,-73.911338


In [111]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

15 venues were returned by Foursquare.


In [114]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [112]:
wakefield_data = wakefield_data.loc[:39,:]
wakefield_venues = getNearbyVenues(names = wakefield_data['Neighborhood'],
                                   latitudes = wakefield_data['Latitude'],
                                   longitudes = wakefield_data['Longitude']
                                  )

Breezy Point
Roxbury
Neponsit
Belle Harbor
Rockaway Park
Rockaway Beach
Hammels
Arverne
Edgemere
Somerville
Broad Channel
Far Rockaway
Bayswater
Howard Beach
Rosedale
Brookville
Lindenwood
Springfield Gardens
Laurelton
South Ozone Park
Rochdale
Ozone Park
Woodhaven
Cambria Heights
St. Albans
South Jamaica
Richmond Hill
Glendale
Jamaica Center
Kew Gardens
Ridgewood
Briarwood
Hollis
Jamaica Hills
Forest Hills Gardens
Middle Village
Jamaica Estates
Queens Village
Holliswood
Kew Gardens Hills


In [113]:
print('There are {} uniques categories.'.format(len(wakefield_venues['Venue Category'].unique())))
print(wakefield_venues.shape)
wakefield_venues.head()

There are 172 uniques categories.
(730, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Breezy Point,40.557401,-73.925512,Breezy Point 9/11 Memorial,40.559201,-73.927213,Monument / Landmark
1,Breezy Point,40.557401,-73.925512,Pelham Walk,40.558499,-73.922276,Trail
2,Breezy Point,40.557401,-73.925512,Beach 221st St,40.556696,-73.929498,Beach
3,Breezy Point,40.557401,-73.925512,Jamirving Beach,40.557101,-73.92025,Beach
4,Breezy Point,40.557401,-73.925512,Brrezy Point Surf Shop,40.559243,-73.920123,Board Shop
