In [155]:
import pandas as pd
import numpy as np
import folium
import json
from geopy.geocoders import Nominatim
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [127]:
address = "Taichung"
geolocater = Nominatim(user_agent='Bar_explorer')
location = geolocater.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}'.format(address,latitude,longitude))

The geograpical coordinate of Taichung are 24.163162, 120.6478282


In [128]:
url = 'https://en.wikipedia.org/wiki/District_(Taiwan)'
df = pd.read_html(url)[4]
df_taichung = df[df['City']=='Taichung']
df_taichung.drop(['Hànyǔ Pīnyīn','Taiwanese POJ','Hakka','Formosan or others','Type'],axis=1,inplace=True)
df_taichung.reset_index(drop=True,inplace=True)

df_taichung.loc[:,'Chinese']='台中市'+df_taichung.loc[:,'Chinese']
df_taichung.rename(columns={'Chinese':'縣市各區'},inplace=True)
df_taichung.head(10)

Unnamed: 0,Name,縣市各區,City
0,Central,台中市中區,Taichung
1,East,台中市東區,Taichung
2,South,台中市南區,Taichung
3,West,台中市西區,Taichung
4,North,台中市北區,Taichung
5,Beitun,台中市北屯區,Taichung
6,Xitun,台中市西屯區,Taichung
7,Nantun,台中市南屯區,Taichung
8,Taiping,台中市太平區,Taichung
9,Dali,台中市大里區,Taichung


In [129]:
url_2='http://www.astrocode.net/%E5%8F%B0%E7%81%A3%E5%90%84%E7%B8%A3%E5%B8%82%E5%9C%B0%E5%8D%80%E7%B6%93%E7%B7%AF%E5%BA%A6/'
df_2=pd.read_html(url_2)[0]
new_header=df_2.loc[0]
new_header=new_header.to_list()
df_2.columns = new_header
df_2=df_2[88:112]
df_2.reset_index(drop=True,inplace=True)
df_2.head(10)

Unnamed: 0,主要縣市,縣市各區,Longitude (經度),Latitude(緯度)
0,台中,台中市大安區,120E35’00”,24N21’00”
1,,台中市大甲區,120E31’00”,24N21’00”
2,,台中市外埔區,120E39’00”,24N20’00”
3,,台中市后里區,120E42’00”,24N18’00”
4,,台中市石崗區,120E46’00”,24N17’00”
5,,台中市清水區,120E34’00”,24N16’00”
6,,台中市梧棲區,120E31’00”,24N15’00”
7,,台中市神崗區,120E39’00”,24N15’00”
8,,台中市豐原區,120E43’00”,24N15’00”
9,,台中市新社區,120E48’00”,24N15’00”


In [130]:
taichung_data = pd.merge(df_taichung,df_2,on='縣市各區')
taichung_data.head(5)

Unnamed: 0,Name,縣市各區,City,主要縣市,Longitude (經度),Latitude(緯度)
0,Beitun,台中市北屯區,Taichung,,120E41’00”,24N10’00”
1,Xitun,台中市西屯區,Taichung,,120E37’00”,24N11’00”
2,Nantun,台中市南屯區,Taichung,,120E37’00”,24N08’00”
3,Taiping,台中市太平區,Taichung,,120E42’00”,24N08’00”
4,Dali,台中市大里區,Taichung,,120E40’00”,24N06’00”


In [131]:
taichung_data.drop('主要縣市',axis=1,inplace=True)
column = ['District','District in Chinese','City','Longitude','Latitude']
taichung_data.columns = column
taichung_data.head(5)

Unnamed: 0,District,District in Chinese,City,Longitude,Latitude
0,Beitun,台中市北屯區,Taichung,120E41’00”,24N10’00”
1,Xitun,台中市西屯區,Taichung,120E37’00”,24N11’00”
2,Nantun,台中市南屯區,Taichung,120E37’00”,24N08’00”
3,Taiping,台中市太平區,Taichung,120E42’00”,24N08’00”
4,Dali,台中市大里區,Taichung,120E40’00”,24N06’00”


In [132]:
taichung_data.dtypes

District               object
District in Chinese    object
City                   object
Longitude              object
Latitude               object
dtype: object

In [133]:
import re

In [134]:
def dms2dd(s):
    degree,direction,minute,n0,second,n1,n2=re.split('([\D’”])',s)
    dd = float(degree) + float(minute)/60 + float(second)/(60*60)
    if direction in ('S','W'):
        dd*=-1
    return dd

In [135]:
taichung_data['Longitude'] = taichung_data['Longitude'].apply(dms2dd)
taichung_data['Latitude'] = taichung_data['Latitude'].apply(dms2dd)
taichung_data.head(5)

Unnamed: 0,District,District in Chinese,City,Longitude,Latitude
0,Beitun,台中市北屯區,Taichung,120.683333,24.166667
1,Xitun,台中市西屯區,Taichung,120.616667,24.183333
2,Nantun,台中市南屯區,Taichung,120.616667,24.133333
3,Taiping,台中市太平區,Taichung,120.7,24.133333
4,Dali,台中市大里區,Taichung,120.666667,24.1


In [136]:
taichung_data.loc[18,'Longitude'] = 120.576356
taichung_data.loc[18,'Latitude'] = 24.3777993
taichung_data

Unnamed: 0,District,District in Chinese,City,Longitude,Latitude
0,Beitun,台中市北屯區,Taichung,120.683333,24.166667
1,Xitun,台中市西屯區,Taichung,120.616667,24.183333
2,Nantun,台中市南屯區,Taichung,120.616667,24.133333
3,Taiping,台中市太平區,Taichung,120.7,24.133333
4,Dali,台中市大里區,Taichung,120.666667,24.1
5,Wufeng,台中市霧峰區,Taichung,120.683333,24.066667
6,Wuri,台中市烏日區,Taichung,120.616667,24.1
7,Fengyuan,台中市豐原區,Taichung,120.716667,24.25
8,Houli,台中市后里區,Taichung,120.7,24.3
9,Dongshi,台中市東勢區,Taichung,120.816667,24.25


In [137]:
map_taichung=folium.Map(location=[latitude,longitude],zoom_start=11)
# add markers to map
for lat, lng, label in zip(taichung_data['Latitude'], taichung_data['Longitude'], taichung_data['District']):
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        popup=label,).add_to(map_taichung)  
    
map_taichung

In [138]:
CLIENT_ID = 'FBPNFI5G2I2PMNIJATJNQHPEE1ZOUPW5ONS3AYNCUOERLRVX'
CLIENT_SECRET = '0SQXN3B5PEF0IOSFO0LVDS20EDI5L1D0NVDXZMQUTX5VWFC3'
VERSION = '20191020'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FBPNFI5G2I2PMNIJATJNQHPEE1ZOUPW5ONS3AYNCUOERLRVX
CLIENT_SECRET:0SQXN3B5PEF0IOSFO0LVDS20EDI5L1D0NVDXZMQUTX5VWFC3


`https://api.foursquare.com/v2/venues/search?client_id=CLIENT_ID&client_secret=CLIENT_SECRET&ll=LATITUDE,LONGITUDE&v=VERSION&query=QUERY&radius=RADIUS&limit=LIMIT`

In [139]:
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [140]:
taichung_venues = getNearbyVenues(taichung_data['District'],taichung_data['Latitude'],taichung_data['Longitude'])

Beitun
Xitun
Nantun
Taiping
Dali
Wufeng
Wuri
Fengyuan
Houli
Dongshi
Xinshe
Tanzi
Daya
Dadu
Shalu
Longjing
Wuqi
Qingshui
Dajia
Waipu
Daan
Heping


In [141]:
taichung_venues

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Beitun,24.166667,120.683333,東興市魯肉義,24.167086,120.677003,Taiwanese Restaurant
1,Beitun,24.166667,120.683333,親親戲院,24.160305,120.691848,Movie Theater
2,Beitun,24.166667,120.683333,鼎王麻辣火鍋,24.167037,120.684236,Hotpot Restaurant
3,Beitun,24.166667,120.683333,老向的店,24.170950,120.684517,Chinese Restaurant
4,Beitun,24.166667,120.683333,田樂學院店,24.158740,120.681458,Café
5,Beitun,24.166667,120.683333,Eslite Bookstore (誠品台中中友店 Eslite Bookstore),24.152070,120.684945,Bookstore
6,Beitun,24.166667,120.683333,一中豐仁冰,24.148982,120.686501,Ice Cream Shop
7,Beitun,24.166667,120.683333,神仙草,24.159135,120.671885,Juice Bar
8,Beitun,24.166667,120.683333,台中自然博物館 植物園,24.158486,120.667863,Botanical Garden
9,Beitun,24.166667,120.683333,阿裕壽司,24.161228,120.664901,Japanese Restaurant


In [142]:
taichung_venues.shape

(239, 7)

In [143]:
print('There are {} uniques categories.'.format(len(taichung_venues['Venue Category'].unique())))

There are 71 uniques categories.


In [144]:
taichung_onehot = pd.get_dummies(taichung_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
taichung_onehot['District'] = taichung_venues['District'] 

# move neighborhood column to the first column
fixed_columns = [taichung_onehot.columns[-1]] + list(taichung_onehot.columns[:-1])
taichung_onehot = taichung_onehot[fixed_columns]

taichung_onehot.head()

Unnamed: 0,District,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Baseball Stadium,Beach,Bike Rental / Bike Share,...,Soup Place,Street Art,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tea Room,Theme Park,Train Station,Udon Restaurant,Vegetarian / Vegan Restaurant
0,Beitun,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,Beitun,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Beitun,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Beitun,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Beitun,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [145]:
taichung_grouped = taichung_onehot.groupby('District').mean().reset_index()
taichung_grouped

Unnamed: 0,District,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Baseball Stadium,Beach,Bike Rental / Bike Share,...,Soup Place,Street Art,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tea Room,Theme Park,Train Station,Udon Restaurant,Vegetarian / Vegan Restaurant
0,Beitun,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02381,0.0,0.0,0.0,0.047619,0.02381,0.0,0.02381,0.0,0.0
1,Daan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Dadu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0
3,Dajia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Dali,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
5,Daya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Dongshi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Fengyuan,0.0,0.0,0.142857,0.0,0.0,0.071429,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0
8,Heping,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0
9,Houli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0


In [146]:
taichung_grouped.shape

(22, 72)

In [147]:
num_top_venues = 5
for dist in taichung_grouped['District']:
    print("----"+dist+"----")
    temp = taichung_grouped[taichung_grouped['District'] == dist].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Beitun----
                 venue  freq
0    Convenience Store  0.12
1   Chinese Restaurant  0.10
2          Coffee Shop  0.10
3                 Café  0.07
4  Japanese Restaurant  0.05


----Daan----
               venue  freq
0  Mobile Phone Shop   0.5
1         Smoke Shop   0.5
2        Art Gallery   0.0
3        Music Venue   0.0
4               Park   0.0


----Dadu----
                    venue  freq
0           Historic Site  0.25
1           Train Station  0.25
2  Furniture / Home Store  0.25
3             Supermarket  0.25
4           Movie Theater  0.00


----Dajia----
                venue  freq
0               Beach   0.6
1  Seafood Restaurant   0.2
2          Smoke Shop   0.2
3         Art Gallery   0.0
4              Museum   0.0


----Dali----
                 venue  freq
0   Chinese Restaurant  0.25
1  Japanese Restaurant  0.25
2           Theme Park  0.25
3                 Café  0.25
4         Night Market  0.00


----Daya----
                  venue  freq
0  Fast F

In [148]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [149]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['District'] = taichung_grouped['District']

for ind in np.arange(taichung_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(taichung_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.shape

(22, 11)

In [150]:
kclusters = 10

taichung_grouped_clustering = taichung_grouped.drop('District', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(taichung_grouped_clustering)

# check cluster labels generated for each row in the dataframe
print(kmeans.labels_[0:10])
print(len(kmeans.labels_))

[1 6 8 2 4 0 7 9 5 4]
22


In [153]:
taichung_merged = taichung_data

# add clustering labels
taichung_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
taichung_merged = taichung_merged.join(neighborhoods_venues_sorted.set_index('District'), on='District')

taichung_merged.head() # check the last columns!

Unnamed: 0,District,District in Chinese,City,Longitude,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Beitun,台中市北屯區,Taichung,120.683333,24.166667,1,Convenience Store,Coffee Shop,Chinese Restaurant,Café,Japanese Restaurant,Taiwanese Restaurant,Hotel,Botanical Garden,Movie Theater,Night Market
1,Xitun,台中市西屯區,Taichung,120.616667,24.183333,6,Convenience Store,Coffee Shop,Hotel,Shopping Mall,Chinese Restaurant,Cantonese Restaurant,Café,Fast Food Restaurant,Hotpot Restaurant,Park
2,Nantun,台中市南屯區,Taichung,120.616667,24.133333,8,Vegetarian / Vegan Restaurant,Shopping Mall,Asian Restaurant,Restaurant,Historic Site,Taiwanese Restaurant,Campground,Food Truck,Food Stand,Food & Drink Shop
3,Taiping,台中市太平區,Taichung,120.7,24.133333,2,Chinese Restaurant,Hotel,Asian Restaurant,Coffee Shop,Convenience Store,Shopping Mall,Ice Cream Shop,Vegetarian / Vegan Restaurant,Park,Breakfast Spot
4,Dali,台中市大里區,Taichung,120.666667,24.1,4,Chinese Restaurant,Café,Theme Park,Japanese Restaurant,Dessert Shop,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Department Store


In [156]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(taichung_merged['Latitude'], taichung_merged['Longitude'], taichung_merged['District'],kmeans.labels_):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters