#### download all the dependencies that we will need

In [121]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files


!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


#### download data

In [122]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


#### load data

In [123]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [124]:
#newyork_data

In [125]:
neighborhoods_data = newyork_data['features']

#### to dataframe

In [126]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


#### fill the dataframe

In [127]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [128]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [129]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


#### Use geopy library to get the latitude and longitude values of New York City.

In [130]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


#### create map

In [131]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

#### create a new dataframe of the Brooklyn data.

In [132]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
brooklyn_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


In [133]:
brooklyn_data.shape

(70, 4)

#### get Brooklyn Geo info

In [134]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="bl_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brookly are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brookly are 40.6501038, -73.9495823.


#### Define Foursquare Credentials and Version

In [135]:
CLIENT_ID = 'EOJABOVK2VI4ZFAIHXBRYWBDJLNE0ROCZVR1LOHKS1FV4FOT' # your Foursquare ID
CLIENT_SECRET = 'XYEPBE5BY0V33AQBKRMAR4Y3NR10RQRY2100IVDOSLDWRZJA' # your Foursquare Secret
VERSION = '20190920'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EOJABOVK2VI4ZFAIHXBRYWBDJLNE0ROCZVR1LOHKS1FV4FOT
CLIENT_SECRET:XYEPBE5BY0V33AQBKRMAR4Y3NR10RQRY2100IVDOSLDWRZJA


#### explore neighborhood in our dataframe

In [136]:
radius = 500
LIMIT=100

In [137]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['id'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue ID',          
                  'Venue Category']
    
    return(nearby_venues)



In [138]:
brooklyn_venues=[]
brooklyn_venues = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                latitudes=brooklyn_data['Latitude'],
                                longitudes=brooklyn_data['Longitude']
                                )

    
print(brooklyn_venues.shape)
brooklyn_venues.head()

(2834, 8)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID,Venue Category
0,Bay Ridge,40.625801,-74.030621,Pilo Arts Day Spa and Salon,40.624748,-74.030591,4b895827f964a5206c2d32e3,Spa
1,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,4ad09cf7f964a520bed820e3,Bagel Shop
2,Bay Ridge,40.625801,-74.030621,Cocoa Grinder,40.623967,-74.030863,538ca7d5498ec684c6387a46,Juice Bar
3,Bay Ridge,40.625801,-74.030621,Pegasus Cafe,40.623168,-74.031186,4abe4e84f964a520958c20e3,Breakfast Spot
4,Bay Ridge,40.625801,-74.030621,Ho' Brah Taco Joint,40.62296,-74.031371,4ea17d24e5facb29c6d81d6f,Taco Place


In [139]:
temp=[]
temp=brooklyn_venues.groupby('Neighborhood').count().sort_values('Venue', ascending=False)
temp=temp[temp['Venue']>=80]
list=temp.index.values
print(list)
i=0
for aa in list:
    print(list[i])
    i+=1

['Carroll Gardens' 'South Side' 'Downtown' 'Cobble Hill'
 'Brooklyn Heights' 'North Side' 'Greenpoint' 'Clinton Hill' 'Dumbo'
 'Boerum Hill' 'Prospect Heights' 'Bay Ridge' 'Fort Greene']
Carroll Gardens
South Side
Downtown
Cobble Hill
Brooklyn Heights
North Side
Greenpoint
Clinton Hill
Dumbo
Boerum Hill
Prospect Heights
Bay Ridge
Fort Greene


In [140]:
brooklyn_venues_object=[]
brooklyn_venues_object=brooklyn_venues[brooklyn_venues['Neighborhood'].isin(list)]

In [141]:
brooklyn_venues_object.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bay Ridge,82,82,82,82,82,82,82
Boerum Hill,86,86,86,86,86,86,86
Brooklyn Heights,100,100,100,100,100,100,100
Carroll Gardens,100,100,100,100,100,100,100
Clinton Hill,92,92,92,92,92,92,92
Cobble Hill,100,100,100,100,100,100,100
Downtown,100,100,100,100,100,100,100
Dumbo,88,88,88,88,88,88,88
Fort Greene,80,80,80,80,80,80,80
Greenpoint,100,100,100,100,100,100,100


In [142]:
brooklyn_venues_object.groupby('Venue Category').count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Accessories Store,1,1,1,1,1,1,1
Adult Boutique,1,1,1,1,1,1,1
American Restaurant,20,20,20,20,20,20,20
Antique Shop,6,6,6,6,6,6,6
Arepa Restaurant,2,2,2,2,2,2,2
Argentinian Restaurant,2,2,2,2,2,2,2
Art Gallery,7,7,7,7,7,7,7
Arts & Crafts Store,8,8,8,8,8,8,8
Asian Restaurant,5,5,5,5,5,5,5
Athletics & Sports,2,2,2,2,2,2,2


In [143]:
temp2=[]
temp2=brooklyn_venues_object['Venue Category'].reset_index(drop=True)
bool=temp2.str.contains('Restaurant')

temp2=temp2[bool].reset_index(drop=True)
temp2=temp2.drop_duplicates().reset_index(drop=True)
list2=temp2.values
list2

array(['Caucasian Restaurant', 'Middle Eastern Restaurant',
       'Italian Restaurant', 'Greek Restaurant', 'Mexican Restaurant',
       'American Restaurant', 'Chinese Restaurant', 'Sushi Restaurant',
       'New American Restaurant', 'Vietnamese Restaurant',
       'Thai Restaurant', 'Seafood Restaurant', 'Indian Restaurant',
       'Dim Sum Restaurant', 'Fast Food Restaurant', 'Polish Restaurant',
       'French Restaurant', 'Restaurant', 'Vegetarian / Vegan Restaurant',
       'Falafel Restaurant', 'Ramen Restaurant', 'Caribbean Restaurant',
       'Korean Restaurant', 'Latin American Restaurant',
       'Cajun / Creole Restaurant', 'Southern / Soul Food Restaurant',
       'Japanese Restaurant', 'Asian Restaurant',
       'Eastern European Restaurant', 'Spanish Restaurant',
       'Ethiopian Restaurant', 'Dumpling Restaurant',
       'Mediterranean Restaurant', 'Argentinian Restaurant',
       'Filipino Restaurant', 'Cuban Restaurant', 'Israeli Restaurant',
       'Tapas Restaura

In [144]:
brooklyn_venues_food=[]
brooklyn_venues_food=brooklyn_venues_object[brooklyn_venues_object['Venue Category'].isin(list2)].reset_index(drop=True)
brooklyn_venues_food.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID,Venue Category
0,Bay Ridge,40.625801,-74.030621,Georgian Dream Cafe and Bakery,40.625586,-74.030196,57bf2b24498e2a2800681ed3,Caucasian Restaurant
1,Bay Ridge,40.625801,-74.030621,Karam,40.622931,-74.028316,4b6785d6f964a520aa532be3,Middle Eastern Restaurant
2,Bay Ridge,40.625801,-74.030621,Areo Ristorante,40.624415,-74.030782,4adb7364f964a520a32721e3,Italian Restaurant
3,Bay Ridge,40.625801,-74.030621,Elia Restaurant,40.62309,-74.031156,4bbce66da0a0c9b6b9f41a0f,Greek Restaurant
4,Bay Ridge,40.625801,-74.030621,Blue Door Souvlakia,40.624567,-74.030311,58fa85be98fbfc397bafe21f,Greek Restaurant


In [145]:
brooklyn_venues_food.shape

(324, 8)

In [146]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="bl_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brookly are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brookly are 40.6501038, -73.9495823.


#### Gen a map for hot area's Restaurant

In [147]:
# create map of Brooklyn Restaurant using latitude and longitude values
map_brooklyn_food = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(brooklyn_venues_food['Venue Latitude'], brooklyn_venues_food['Venue Longitude'], brooklyn_venues_food['Venue']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn_food)  
    
map_brooklyn_food

#### Group by restaurant type

In [148]:

brooklyn_food_type = brooklyn_venues_food.groupby(['Venue Category','Neighborhood']).count()


In [149]:
brooklyn_food_type

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID
Venue Category,Neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
American Restaurant,Bay Ridge,3,3,3,3,3,3
American Restaurant,Boerum Hill,1,1,1,1,1,1
American Restaurant,Brooklyn Heights,2,2,2,2,2,2
American Restaurant,Cobble Hill,1,1,1,1,1,1
American Restaurant,Dumbo,2,2,2,2,2,2
American Restaurant,Fort Greene,1,1,1,1,1,1
American Restaurant,Greenpoint,1,1,1,1,1,1
American Restaurant,North Side,3,3,3,3,3,3
American Restaurant,Prospect Heights,2,2,2,2,2,2
American Restaurant,South Side,4,4,4,4,4,4


#### Now we know most people of Brooklyn like Italian food,now let's check the Italian food distribution

In [150]:
brooklyn_venues_Italianfood=brooklyn_venues_food[brooklyn_venues_food['Venue Category']=='Italian Restaurant'].reset_index(drop=True)

In [151]:
brooklyn_venues_Italianfood.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID,Venue Category
0,Bay Ridge,40.625801,-74.030621,Areo Ristorante,40.624415,-74.030782,4adb7364f964a520a32721e3,Italian Restaurant
1,Bay Ridge,40.625801,-74.030621,Tuscany Grill,40.622913,-74.031387,4b70d8dff964a52063312de3,Italian Restaurant
2,Bay Ridge,40.625801,-74.030621,Sofia Restorante Italiano,40.624847,-74.030522,4a7e205ef964a52012f11fe3,Italian Restaurant
3,Bay Ridge,40.625801,-74.030621,Zio Toto,40.624791,-74.030551,4ad62c95f964a5208d0521e3,Italian Restaurant
4,Bay Ridge,40.625801,-74.030621,Giacomo's Wood Fired Pizza & Trattoria,40.628535,-74.029051,4e732dff45dd41e9ccce1987,Italian Restaurant


In [152]:
brooklyn_venues_Italianfood.shape

(44, 8)

In [153]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="bl_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brookly are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brookly are 40.6501038, -73.9495823.


#### use different color indicate different neighborhood

In [154]:
import random
import numpy as np
import matplotlib.colors as colors


for key in colors.cnames:
    listcol.append(key)

mycol=listcol[random.randint(1,100)]
mycol



'mistyrose'

In [156]:
# create map of Brooklyn Restaurant using latitude and longitude values
map_brooklyn_itfood = folium.Map(location=[latitude, longitude], zoom_start=12)
old_nb=''
new_color=''
# add markers to map
for lat, lng, label,nb in zip(brooklyn_venues_Italianfood['Venue Latitude'], brooklyn_venues_Italianfood['Venue Longitude'], brooklyn_venues_Italianfood['Venue'],brooklyn_venues_Italianfood['Neighborhood']):
    label = folium.Popup(label+' of ' + nb, parse_html=True)
    if nb != old_nb:
        new_color=listcol[random.randint(10,100)]        
        
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color=new_color,
#        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn_itfood)  
    
    old_nb=nb
    
map_brooklyn_itfood

#### Now we know most Italian food round Boerum Hill,but Boerum Hill only one Italian restaurant

####  Boerum Hill seems a good candidate for a Italian restaurant,let's further check the Venues in Boerum Hill

In [160]:
brooklyn_venues[brooklyn_venues['Neighborhood']=='Boerum Hill'].groupby('Venue Category').count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue ID
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
American Restaurant,1,1,1,1,1,1,1
Antique Shop,1,1,1,1,1,1,1
Arts & Crafts Store,2,2,2,2,2,2,2
Athletics & Sports,1,1,1,1,1,1,1
Bakery,2,2,2,2,2,2,2
Bank,1,1,1,1,1,1,1
Bar,3,3,3,3,3,3,3
Bookstore,1,1,1,1,1,1,1
Boutique,1,1,1,1,1,1,1
Bubble Tea Shop,1,1,1,1,1,1,1


#### We see a lot of shop,store,park in Boerum Hill,so let's open a open a Italian Restaurant in Boerum Hill！！！