## Download data

In [151]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [152]:
url = 'https://www.zip-codes.com/city/ca-berkeley.asp'
response = requests.get(url)
soup = BeautifulSoup(response.content,'lxml')

In [153]:
table = []
titles = []
a = soup.find('table',{'id':'tblZIP'})
b = a.findAll('a',{'style':'text-decoration:underline;'})
for txt in b:
    c = txt.get_text()
    table.append(c[-5:])
table

['94701',
 '94702',
 '94703',
 '94704',
 '94705',
 '94706',
 '94707',
 '94708',
 '94709',
 '94710',
 '94712',
 '94720']

In [154]:
def getLatLon(url):
    i = 0
    response = requests.get(url)
    soup = BeautifulSoup(response.content,'lxml')
    labels = soup.findAll('span',{'class':'Tips2'})
    info = soup.findAll('td',{'class':'info'})
    for txt in labels:
        if txt.get_text() == 'Latitude:':
            break
        i+=1
    zc = info[0].get_text()
    lat = info[i].get_text()
    lon = info[i+1].get_text()
    return [zc,lat,lon]

In [155]:
matrix = []
for zc in table:
    url = 'https://www.zip-codes.com/zip-code/{zcd}/zip-code-{zcd}.asp'.format(zcd=zc)
    matrix.append(getLatLon(url))
matrix

[['94701', '37.8718', '-122.2718'],
 ['94702', '37.864164', '-122.286234'],
 ['94703', '37.865733', '-122.274618'],
 ['94704', '37.872228', '-122.244743'],
 ['94705', '37.8616', '-122.242051'],
 ['94706', '37.889622', '-122.294909'],
 ['94707', '37.899439', '-122.278492'],
 ['94708', '37.900138', '-122.261764'],
 ['94709', '37.879862', '-122.264766'],
 ['94710', '37.871464', '-122.307095'],
 ['94712', '37.8718', '-122.2718'],
 ['94720', '37.874602', '-122.25467']]

In [156]:
titles = ['Zip_codes','Latitude','Longitude']

In [175]:
df = pd.DataFrame(data=matrix,columns=titles)
df

Unnamed: 0,Zip_codes,Latitude,Longitude
0,94701,37.8718,-122.2718
1,94702,37.864164,-122.286234
2,94703,37.865733,-122.274618
3,94704,37.872228,-122.244743
4,94705,37.8616,-122.242051
5,94706,37.889622,-122.294909
6,94707,37.899439,-122.278492
7,94708,37.900138,-122.261764
8,94709,37.879862,-122.264766
9,94710,37.871464,-122.307095


## Explore dataset

In [158]:
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [159]:
import folium

In [160]:
address = 'Berkeley, California'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Berkeley are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Berkeley are 37.8708393, -122.2728639.


In [161]:
# create map of Berkeley using latitude and longitude values
map_berkeley = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, zip_code in zip(df['Latitude'], df['Longitude'], df['Zip_codes']):
    label = '{}'.format(zip_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_berkeley)  
    
map_berkeley

In [162]:
CLIENT_ID = 'OKKHIRDMEVJYSWUSE0C5UGVNVE3CGM33ZELSBLTOBJFR54YW' # your Foursquare ID
CLIENT_SECRET = 'EIXLVN2AGBRMXK3PGSDXICEWHSTQBOM4VLKPBDW14PACGGWF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OKKHIRDMEVJYSWUSE0C5UGVNVE3CGM33ZELSBLTOBJFR54YW
CLIENT_SECRET:EIXLVN2AGBRMXK3PGSDXICEWHSTQBOM4VLKPBDW14PACGGWF


In [163]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            '4d4b7105d754a06374d81259',
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Zip_codes', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [164]:
berkeley_venues = getNearbyVenues(names=df['Zip_codes'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

94701
94702
94703
94704
94705
94706
94707
94708
94709
94710
94712
94720


In [165]:
berkeley_venues

Unnamed: 0,Zip_codes,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,94701,37.8718,-122.2718,Berkeley Social Club,37.871909,-122.269507,Asian Restaurant
1,94701,37.8718,-122.2718,Imm Thai Street Food,37.872006,-122.269090,Thai Restaurant
2,94701,37.8718,-122.2718,Plátano Salvadoran Cuisine,37.871933,-122.269794,Latin American Restaurant
3,94701,37.8718,-122.2718,Comal,37.871568,-122.268774,Mexican Restaurant
4,94701,37.8718,-122.2718,The Butcher's Son,37.871771,-122.272085,Vegetarian / Vegan Restaurant
5,94701,37.8718,-122.2718,Sushi California,37.871149,-122.272953,Sushi Restaurant
6,94701,37.8718,-122.2718,Pedro's Brazil Cafe,37.871608,-122.271536,Brazilian Restaurant
7,94701,37.8718,-122.2718,sweetgreen,37.874001,-122.268860,Salad Place
8,94701,37.8718,-122.2718,Bobby G's Pizzeria,37.872029,-122.268957,Pizza Place
9,94701,37.8718,-122.2718,Revival Bar + Kitchen,37.871016,-122.268282,American Restaurant


In [166]:
dfcolors.loc[str(94720),0]

'#ff0000'

In [167]:
# create map of Berkeley using latitude and longitude values
map_berkeley = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the zip codes
x = np.arange(len(table))
ys = [i + x + (i*x)**2 for i in range(len(table))]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
dfcolors = pd.DataFrame(index=table,data=rainbow)

i=0
# add markers to map
for lat, lng, zip_code in zip(berkeley_venues['Venue Latitude'], berkeley_venues['Venue Longitude'], berkeley_venues['Zip_codes']):
    label = '{}'.format(zip_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=dfcolors.loc[str(zip_code),0],
        fill=True,
        fill_color=dfcolors.loc[str(zip_code),0],
        fill_opacity=0.7,
        parse_html=False).add_to(map_berkeley)  
    i+=1
    

    
map_berkeley

## Neighborhood Analysis

In [168]:
restaurants = berkeley_venues['Zip_codes'].value_counts(sort=False)
restaurants.sort_index(inplace=True)
restaurants = pd.DataFrame(data=restaurants)
restaurants.rename(columns={'Zip_codes':'Restaurants'}, inplace=True)
restaurants.sort_values(by='Restaurants',ascending=False)

Unnamed: 0,Restaurants
94701,43
94712,43
94706,39
94709,25
94720,20
94702,13
94705,7
94703,4
94707,3
94708,2


In [176]:
df = df.merge(restaurants,left_on='Zip_codes',right_index=True,how='left')
df.fillna(value=0,inplace=True)
df

Unnamed: 0,Zip_codes,Latitude,Longitude,Restaurants
0,94701,37.8718,-122.2718,43.0
1,94702,37.864164,-122.286234,13.0
2,94703,37.865733,-122.274618,4.0
3,94704,37.872228,-122.244743,0.0
4,94705,37.8616,-122.242051,7.0
5,94706,37.889622,-122.294909,39.0
6,94707,37.899439,-122.278492,3.0
7,94708,37.900138,-122.261764,2.0
8,94709,37.879862,-122.264766,25.0
9,94710,37.871464,-122.307095,0.0


## Clustering

In [177]:
# set number of clusters
kclusters = 3

Berkeley_grouped_clustering = df.drop('Zip_codes', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Berkeley_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 2, 1, 1, 1, 0, 1, 1, 2, 1, 0, 2])

In [178]:
# add clustering labels
df.insert(0, 'Cluster Labels', kmeans.labels_)

df

Unnamed: 0,Cluster Labels,Zip_codes,Latitude,Longitude,Restaurants
0,0,94701,37.8718,-122.2718,43.0
1,2,94702,37.864164,-122.286234,13.0
2,1,94703,37.865733,-122.274618,4.0
3,1,94704,37.872228,-122.244743,0.0
4,1,94705,37.8616,-122.242051,7.0
5,0,94706,37.889622,-122.294909,39.0
6,1,94707,37.899439,-122.278492,3.0
7,1,94708,37.900138,-122.261764,2.0
8,2,94709,37.879862,-122.264766,25.0
9,1,94710,37.871464,-122.307095,0.0


In [179]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Cluster Labels'], df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [119]:
x

array([0, 1, 2])