In [11]:
#conda install -c conda-forge geopy

In [12]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 10 )
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from folium import plugins
from folium.plugins import HeatMap


---
### **Introduction: Business Problem**


Analyzing the number of fitness centers in Manhattan, New York to decide where would be a good location to start a new gym.

Who would be interested in this project: Investors and business owners who would like to start a gym, as well as fitness enthusiasts who would like to live closer to a ym or fitness center

---

### **Data**

I used the '**newyork_data.json**' dataset from IBM, as well as the Foursqaure API to get the information on gyms and fitness centers in Manhattan


### **Methodology**

I started off by loading the **newyork_data.json** Json file into a data set. I organized the data set into four columns - Borough, Neighborhood, Latitude and Longitude.

I then took the data from the new york dataset and created a new dataset for just Manhattan, which consisted of clusters of all the Neighborhoods in Manhattan using their Latitude and Longitude coordinates

After that I used the Foursquare API combined with the Manhattan Neighborhood Dataset to get a list of every Gym and Fitness center in the Borough along with their Latitude and Longitudes.

Then using Folium I generated a heatmap to see the density or 'heat' of the number of Gyms and Fitness Centers in different parts of Manhattan

In [13]:
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json

In [14]:
with open('newyork_data.json') as json_data:
  newyork_data = json.load(json_data)

In [15]:
#print(newyork_data)

In [16]:
neighborhoods_data = newyork_data['features']
neighborhoods_data[0:2]

[{'type': 'Feature',
  'id': 'nyu_2451_34572.1',
  'geometry': {'type': 'Point',
   'coordinates': [-73.84720052054902, 40.89470517661]},
  'geometry_name': 'geom',
  'properties': {'name': 'Wakefield',
   'stacked': 1,
   'annoline1': 'Wakefield',
   'annoline2': None,
   'annoline3': None,
   'annoangle': 0.0,
   'borough': 'Bronx',
   'bbox': [-73.84720052054902,
    40.89470517661,
    -73.84720052054902,
    40.89470517661]}},
 {'type': 'Feature',
  'id': 'nyu_2451_34572.2',
  'geometry': {'type': 'Point',
   'coordinates': [-73.82993910812398, 40.87429419303012]},
  'geometry_name': 'geom',
  'properties': {'name': 'Co-op City',
   'stacked': 2,
   'annoline1': 'Co-op',
   'annoline2': 'City',
   'annoline3': None,
   'annoangle': 0.0,
   'borough': 'Bronx',
   'bbox': [-73.82993910812398,
    40.87429419303012,
    -73.82993910812398,
    40.87429419303012]}}]

In [17]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [18]:
for data in neighborhoods_data:
  borough = neighborhood_name = data['properties']['borough']
  neighborhood_name = data['properties']['name']
  neighborhood_latlon = data['geometry']['coordinates']
  neighborhood_lat = neighborhood_latlon[1]
  neighborhood_lon = neighborhood_latlon[0]

  neighborhoods = neighborhoods.append({'Borough': borough,
                                        'Neighborhood': neighborhood_name,
                                        'Latitude': neighborhood_lat,
                                        'Longitude': neighborhood_lon}, ignore_index = True)

In [19]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [20]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)

In [21]:
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688



### **Analysis**

As you can see, Manhattan is denser (has more neighborhoods) toward the water and the South West end of the borough, and less dense in the North East end. 

In [22]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
manhattan_center = [latitude, longitude]
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))
print(manhattan_center)

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.
[40.7896239, -73.9598939]


In [23]:
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

In [24]:
CLIENT_ID = 'SQJU3NP5PSZZ4SEHGSC3ILIU3VPY0VMHAQNU1JWLKMNYN2GA' # your Foursquare ID
CLIENT_SECRET = 'P3SDDVEOOGVU2W5A4DJLOVREYURICFU000DDYLDJ3HHISJ4V' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SQJU3NP5PSZZ4SEHGSC3ILIU3VPY0VMHAQNU1JWLKMNYN2GA
CLIENT_SECRET:P3SDDVEOOGVU2W5A4DJLOVREYURICFU000DDYLDJ3HHISJ4V


In [25]:
manhattan_data

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.910660
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.936900
3,Manhattan,Inwood,40.867684,-73.921210
4,Manhattan,Hamilton Heights,40.823604,-73.949688
...,...,...,...,...
35,Manhattan,Turtle Bay,40.752042,-73.967708
36,Manhattan,Tudor City,40.746917,-73.971219
37,Manhattan,Stuyvesant Town,40.731000,-73.974052
38,Manhattan,Flatiron,40.739673,-73.990947


In [26]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=SQJU3NP5PSZZ4SEHGSC3ILIU3VPY0VMHAQNU1JWLKMNYN2GA&client_secret=P3SDDVEOOGVU2W5A4DJLOVREYURICFU000DDYLDJ3HHISJ4V&v=20180605&ll=40.7896239,-73.9598939&radius=500&limit=100'

In [27]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '614b5101b7b31a07c58f840d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Central Park',
  'headerFullLocation': 'Central Park, New York',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 28,
  'suggestedBounds': {'ne': {'lat': 40.794123904500005,
    'lng': -73.95396136384342},
   'sw': {'lat': 40.7851238955, 'lng': -73.96582643615658}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4a78425df964a52053e51fe3',
       'name': 'Central Park Tennis Center',
       'location': {'address': 'Central Park West at 96th St',
        'lat': 40.78931319964619,
        'lng': -73.96186241658044,
        'labeledLatLngs': [{'label': 'disp

In [28]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [29]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues)


Unnamed: 0,name,categories,lat,lng
0,Central Park Tennis Center,Tennis Court,40.789313,-73.961862
1,East Meadow,Field,40.79016,-73.955498
2,North Meadow Recreation Center,Recreation Center,40.791216,-73.959661
3,Oldest Tree in Central Park,Park,40.789188,-73.957867
4,Central Park - Wild West Playground,Playground,40.789715,-73.965471


In [30]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

28 venues were returned by Foursquare.


In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
manhattan_venues = getNearbyVenues(names = manhattan_data['Neighborhood'],
                                  latitudes = manhattan_data['Latitude'],
                                  longitudes = manhattan_data['Longitude'])

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [33]:
print(manhattan_venues.shape)
manhattan_venues.head()

(3276, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Bikram Yoga,40.876844,-73.906204,Yoga Studio
1,Marble Hill,40.876551,-73.91066,Arturo's,40.874412,-73.910271,Pizza Place
2,Marble Hill,40.876551,-73.91066,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop
4,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop


Filter out Neighborhoods with Gyms or Fitness Centers

In [34]:
fitness_venues_list = ['Gym', 'Gym / Fitness Center']
manhattan_gyms = manhattan_venues[manhattan_venues['Venue Category'].isin(fitness_venues_list)]
manhattan_gyms.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
5,Marble Hill,40.876551,-73.91066,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
6,Marble Hill,40.876551,-73.91066,Blink Fitness,40.877271,-73.905595,Gym
165,Washington Heights,40.851903,-73.9369,Planet Fitness,40.847536,-73.937937,Gym / Fitness Center
182,Washington Heights,40.851903,-73.9369,Blink Fitness,40.848562,-73.936941,Gym
208,Washington Heights,40.851903,-73.9369,Lucille Roberts,40.848487,-73.934636,Gym


### **Analysis Continued**

Here we see a heatmap of the gyms and fitness centers and you can immediately notice that there are more gyms towards the Southern end of Manhattan than the Northern end (much like the density of the neighborhoods)

In [35]:
gyms_latlongs = manhattan_gyms[['Venue Latitude', 'Venue Longitude']]
gyms_latlongs_list = gyms_latlongs.values.tolist()

In [36]:
manhattan_neighborhood_latlongs = manhattan_gyms[['Neighborhood Latitude', 'Neighborhood Longitude']]
manhattan_neighborhood_latlongs_list = manhattan_neighborhood_latlongs.values.tolist()

In [37]:
def manhattan_neighborhood_style(feature):
    return { 'color': 'blue', 'fill': False }

In [38]:
heatmap_manhattan = folium.Map(location=[latitude, longitude], zoom_start=12, width = '50%', height = '70%')
folium.TileLayer('cartodbpositron').add_to(heatmap_manhattan)
HeatMap(gyms_latlongs_list).add_to(heatmap_manhattan)
folium.Marker(manhattan_center).add_to(heatmap_manhattan)
folium.Circle(manhattan_center, radius=3000, fill=False, color='white').add_to(heatmap_manhattan)
folium.Circle(manhattan_center, radius=6000, fill=False, color='white').add_to(heatmap_manhattan)
folium.Circle(manhattan_center, radius=9000, fill=False, color='white').add_to(heatmap_manhattan)
heatmap_manhattan

### **Results and Discussion**

Viewing the analysis shows us that Manhattan has a lot more neighborhoods towards its Southern end, and it also has more gyms and fitness centers in that direction.

It seems to have the highest number of fitness centers in the Financial District and Upper East Side and very few in Harlem, Carnegie Hill and Manhattan Valley - these neighborhoods could be ideal oppurtunities in therms of the lack of fitness centers.





---

### **Conclusion**

This report simply explored the oppurtinity for opening a new gym or fitness center in Manhattan purely based on how many other fitness centers were in each neighborhood of Manhattan.

A business owner has oppurtuny in the North side of Manhattan solely based on the advantage of the lack of competition, however it is encouraged to use this report as a guide to collect additional information on the less dense neighborhoods, such as income, population density and more.
