In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

   

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
    

In [5]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [6]:
neighborhoods_data = newyork_data['features']
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [7]:
neighborhoods


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [8]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
brooklyn_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


# the data is about Brooklyn neighborhoods and the their locations i will use this data to search and explore venues that has many coffe shops and then decide 
# where can we open a barand new coffee shop in the best area that is suitable for this kind of business the source of data is NYU and foursqaure Api that i will
# use in the application.

In [9]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

40.6501038 -73.9495823


In [10]:
map_brook = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brook)  
    
map_brook

In [11]:
CLIENT_ID = 'KCLDKZZCGP3UKEEJGF3GXOVOG23LWPVE2QZRXUHFKS55D0HJ' # your Foursquare ID
CLIENT_SECRET = 'AE3BUGIIJ1J44GSAIUJOUK0CQKJ1YPZO4QJ5EZ5KOSP0UAII' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KCLDKZZCGP3UKEEJGF3GXOVOG23LWPVE2QZRXUHFKS55D0HJ
CLIENT_SECRET:AE3BUGIIJ1J44GSAIUJOUK0CQKJ1YPZO4QJ5EZ5KOSP0UAII


In [12]:
LIMIT = 100 # limit of number of venues returned by Foursquare API



radius = 500 # define radius


In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [39]:
brook_venues = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude']
                                  )


Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus


In [41]:
print(brook_venues.shape)
brook_venues.head(200)

(2833, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,Pilo Arts Day Spa and Salon,40.624748,-74.030591,Spa
1,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,Bagel Shop
2,Bay Ridge,40.625801,-74.030621,Cocoa Grinder,40.623967,-74.030863,Juice Bar
3,Bay Ridge,40.625801,-74.030621,Pegasus Cafe,40.623168,-74.031186,Breakfast Spot
4,Bay Ridge,40.625801,-74.030621,Ho' Brah Taco Joint,40.62296,-74.031371,Taco Place
5,Bay Ridge,40.625801,-74.030621,Leo's Casa Calamari,40.623348,-74.031082,Pizza Place
6,Bay Ridge,40.625801,-74.030621,Blue Door Souvlakia,40.624567,-74.030311,Greek Restaurant
7,Bay Ridge,40.625801,-74.030621,Brooklyn Market,40.626939,-74.029948,Grocery Store
8,Bay Ridge,40.625801,-74.030621,Georgian Dream Cafe and Bakery,40.625586,-74.030196,Caucasian Restaurant
9,Bay Ridge,40.625801,-74.030621,The Bookmark Shoppe,40.624577,-74.030562,Bookstore


In [48]:
df1 = brook_venues[brook_venues['Venue Category'].str.contains('Coffee')]

In [49]:
df1

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
62,Bay Ridge,40.625801,-74.030621,Mocha Mocha Cafe,40.622699,-74.028636,Coffee Shop
119,Bensonhurst,40.611009,-73.99518,Caffe Romeo,40.609732,-73.989766,Coffee Shop
177,Greenpoint,40.730201,-73.954241,Homecoming,40.729696,-73.957525,Coffee Shop
188,Greenpoint,40.730201,-73.954241,odd fox,40.732673,-73.95455,Coffee Shop
191,Greenpoint,40.730201,-73.954241,Maman,40.730427,-73.958035,Coffee Shop
202,Greenpoint,40.730201,-73.954241,Upright Coffee,40.729332,-73.953892,Coffee Shop
203,Greenpoint,40.730201,-73.954241,Café de Colombia,40.730526,-73.951822,Coffee Shop
313,Brighton Beach,40.576825,-73.965094,Starbucks,40.577841,-73.961204,Coffee Shop
367,Manhattan Terrace,40.614433,-73.957438,Chock Full O' Nuts,40.61816,-73.958746,Coffee Shop
375,Flatbush,40.636326,-73.958401,Coffee Mob,40.635841,-73.962561,Coffee Shop


In [52]:
df2 = df1['Neighborhood'].value_counts()

In [53]:
df2

North Side            6
South Side            5
Dumbo                 5
Greenpoint            5
Bushwick              5
Carroll Gardens       5
East Williamsburg     4
Cobble Hill           4
Boerum Hill           4
Park Slope            4
Downtown              3
Fort Greene           3
Brooklyn Heights      2
Williamsburg          2
Bedford Stuyvesant    2
Prospect Heights      2
Fulton Ferry          2
Gowanus               2
Vinegar Hill          2
Flatbush              2
Brighton Beach        1
City Line             1
Manhattan Terrace     1
Ocean Hill            1
Borough Park          1
Bensonhurst           1
Georgetown            1
Windsor Terrace       1
Crown Heights         1
Ditmas Park           1
Bath Beach            1
Rugby                 1
Red Hook              1
Bay Ridge             1
Name: Neighborhood, dtype: int64

In [54]:
df3 = df1[df1['Neighborhood'].str.contains('North')]

In [55]:
df3

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2098,North Side,40.714823,-73.958809,Sweatshop,40.71509,-73.959734,Coffee Shop
2135,North Side,40.714823,-73.958809,Black Brick,40.714185,-73.961557,Coffee Shop
2145,North Side,40.714823,-73.958809,Blue Bottle Coffee,40.716921,-73.960963,Coffee Shop
2158,North Side,40.714823,-73.958809,ppl,40.712942,-73.95865,Coffee Shop
2172,North Side,40.714823,-73.958809,Oslo Coffee,40.714725,-73.957043,Coffee Shop
2185,North Side,40.714823,-73.958809,Lion's Milk,40.715833,-73.955705,Coffee Shop


In [65]:
map_brook2 = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to map
for lat, lng, label in zip(df3['Venue Latitude'], df3['Venue Longitude'], df3['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brook2)  
    
map_brook2