In [34]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import geocoder
import folium
from geopy.geocoders import Nominatim # convert an address to langtitude and longitude
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# import k-means from clustering stage
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import math

In [2]:
url = "https://vi.wikipedia.org/wiki/Th%C3%A0nh_ph%E1%BB%91_H%E1%BB%93_Ch%C3%AD_Minh"

r  = requests.get(url)

data = r.text

soup = BeautifulSoup(data,'lxml')

In [3]:
df = []
for i in soup.find_all("table",class_="wikitable sortable")[0].find_all("tr"):
    ls = []
    for j in i.find_all("td"):
        ls.append(j.get_text())
    df.append(ls)
df = pd.DataFrame(df)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,,,,,,,,,,,...,,,,,,,,,,
1,\n\n\nTên\n\nDiện tích (km²)\nDân số (người)\n...,Quận (19)\n,Quận 1\n,772,142.0,10 phường\n,Quận 2\n,4979.0,180.0,11 phường\n,...,"1 thị trấn, 20 xã\n",Hóc Môn\n,10917.0,542.0,"1 thị trấn, 11 xã\n",Nhà Bè\n,10043.0,206.0,"1 thị trấn, 6 xã\n",\n
2,,,,,,,,,,,...,,,,,,,,,,
3,Quận (19)\n,,,,,,,,,,...,,,,,,,,,,
4,Quận 1\n,772,142.000,10 phường\n,,,,,,,...,,,,,,,,,,


In [5]:
df.drop(labels=[0,1,2,3,16,24],inplace=True)
df = df.iloc[:,0:3]
df.columns=['District Name','Acreage (Km2)','Population']
df.reset_index(drop=True,inplace=True)
# Remove EOL "\n" String in dataset
for i in df.columns:
    df[i] = df[i].str.replace("\n","",regex=True)

In [6]:
df['Acreage (Km2)'] = df['Acreage (Km2)'].str.replace(",",".",regex=True)
df['Population'] = df['Population'].str.replace(".","",regex=True)
df['Acreage (Km2)'].astype(float)
df['Population'].astype(int)

0     142000
1     180000
2     190000
3     175000
4     159000
5     233000
6     360000
7     424000
8     397000
9     234000
10    209000
11    620000
12    784000
13    499000
14    676000
15    163000
16    474000
17    485000
18    592000
19    705000
20     71000
21    462000
22    542000
23    206000
Name: Population, dtype: int32

In [7]:
df.head()

Unnamed: 0,District Name,Acreage (Km2),Population
0,Quận 1,7.72,142000
1,Quận 2,49.79,180000
2,Quận 3,4.92,190000
3,Quận 4,4.18,175000
4,Quận 5,4.27,159000


In [8]:
df.shape

(24, 3)

In [9]:
df_hcmc = df.copy()

### Prepare for fourquare dataframe

In [10]:
CLIENT_ID = 'TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG' # your Foursquare ID
CLIENT_SECRET = 'G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG
CLIENT_SECRET:G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3


In [11]:
df_hcm_cor = pd.read_csv("HCM district coordinates.csv")

In [12]:
df_hcm_cor.head()

Unnamed: 0,District Name,Latitude,Longitude
0,Quận 1,10.777369,106.696646
1,Quận 2,10.782377,106.754713
2,Quận 3,10.780681,106.680866
3,Quận 4,10.758388,106.702021
4,Quận 5,10.755418,106.667333


In [13]:
df_hcm_cor = df_hcm_cor.sort_values(by="District Name",ascending=True)
df_hcmc = df_hcmc.sort_values(by="District Name",ascending=True)

In [14]:
df_hcm_cor['District Name'] == df_hcmc['District Name']

19    True
13    True
12    True
20    True
21    True
14    True
22    True
23    True
15    True
0     True
9     True
10    True
11    True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
18    True
16    True
17    True
Name: District Name, dtype: bool

In [15]:
df_hcmc = df_hcmc.merge(df_hcm_cor.iloc[:,1:3],left_index=True,right_index=True)

In [16]:
df_hcmc

Unnamed: 0,District Name,Acreage (Km2),Population,Latitude,Longitude
19,Bình Chánh,252.56,705000,10.724583,106.575197
13,Bình Thạnh,20.78,499000,10.812639,106.714579
12,Bình Tân,52.02,784000,10.770324,106.599978
20,Cần Giờ,704.45,71000,10.535622,106.854503
21,Củ Chi,434.77,462000,11.008502,106.518123
14,Gò Vấp,19.73,676000,10.837427,106.666492
22,Hóc Môn,109.17,542000,10.891521,106.600013
23,Nhà Bè,100.43,206000,10.650182,106.729357
15,Phú Nhuận,4.88,163000,10.799939,106.677777
0,Quận 1,7.72,142000,10.777369,106.696646


In [17]:
address = 'Ho Chi Minh'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Ho Chi Minh City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Ho Chi Minh City are 10.7758439, 106.7017555.


In [18]:
m = folium.Map(location=[latitude, longitude],zoom_start=12)
#url = 'https://cocl.us/sanfran_geojson'
#geo_data_input = f'{url}'
for index,values in df_hcmc.iterrows():
    folium.CircleMarker(
        [values['Latitude'], values['Longitude']],
        radius=2,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(m)  
m

In [19]:
CLIENT_ID = 'TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG' # your Foursquare ID
CLIENT_SECRET = 'G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG
CLIENT_SECRET:G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3


In [80]:
# type your answer here
def get_foursquare_url(cl_id,cl_se,ver,lat,long,radius):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&categoryId=4bf58dd8d48988d1e0931735,5665c7b9498e7d8a4f2c0f06,5e18993feee47d000759b256,4bf58dd8d48988d1a1941735,4bf58dd8d48988d128941735,4bf58dd8d48988d16d941735,54135bf5e4b08f3d2429dfe7,56aa371be4b08b9a8d573508,54f4ba06498e2cf5561da814,4bf58dd8d48988d18d941735,4bf58dd8d48988d1f0941735&intent=browse'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        long, 
        radius, 
        )
    return url

In [84]:
def getNearbyVenues(names, latitudes, longitudes, acreage):
    CLIENT_ID = 'TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG' # your Foursquare ID
    CLIENT_SECRET = 'G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3' # your Foursquare Secret
    venues_list=[]
    for name, lat, lng,acr in zip(names, latitudes, longitudes, acreage):
        print(name)
            
        # create the API request URL
        url = get_foursquare_url(CLIENT_ID,CLIENT_SECRET,lat,lng,math.sqrt(float(acr)/float(3.14)))
            
        # make the GET request
        results = requests.get(url).json()
        print(results)
        
        # return only relevant information for each nearby venue
        #venues_list.append([(
        #    name, 
        #    lat, 
        #    lng, 
        #    v[0]['id'],
        #    v[0]['name'],
        #    v[0]['location']['lat'], 
        #    v[0]['location']['lng']) for v in results])
        #for v in results:
        #    print(v)
    print(venues_list)
    #nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    #nearby_venues.columns = ['Neighborhood', 
    #              'Neighborhood Latitude', 
    #              'Neighborhood Longitude', 
    #              'Venue Id',
    #              'Venue Name',
    #              'Venue Latitude', 
    #              'Venue Longitude']
    
    return(nearby_venues)

In [85]:
coffee_venues = getNearbyVenues(names=df_hcmc['District Name'],latitudes = df_hcmc['Latitude'],longitudes = df_hcmc['Longitude'],acreage = df_hcmc['Acreage (Km2)'])

Bình Chánh
{'meta': {'code': 410, 'errorType': 'param_error', 'errorDetail': 'The Foursquare API no longer supports requests that do not pass in a version parameter. For more details see https://developer.foursquare.com/overview/versioning', 'requestId': '5e5fb5e4660a9f001b7fce10'}, 'response': {}}
Bình Thạnh
{'meta': {'code': 410, 'errorType': 'param_error', 'errorDetail': 'The Foursquare API no longer supports requests that do not pass in a version parameter. For more details see https://developer.foursquare.com/overview/versioning', 'requestId': '5e5fb6aab9a389001beaf2f8'}, 'response': {}}
Bình Tân
{'meta': {'code': 410, 'errorType': 'param_error', 'errorDetail': 'The Foursquare API no longer supports requests that do not pass in a version parameter. For more details see https://developer.foursquare.com/overview/versioning', 'requestId': '5e5fb5d9aba297001bfaf110'}, 'response': {}}
Cần Giờ
{'meta': {'code': 410, 'errorType': 'param_error', 'errorDetail': 'The Foursquare API no longe

In [52]:
nearby_venues

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.state,location.country,location.formattedAddress,location.crossStreet,location.postalCode,location.city,venuePage.id,location.neighborhood
0,5ca8c8c7a4b51b0039410171,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,65 Lê Lợi,10.773784,106.70095,"[{'label': 'display', 'lat': 10.773784, 'lng':...",617,VN,Thành phố Hồ Chí Minh,Việt Nam,"[65 Lê Lợi, Thành phố Hồ Chí Minh, Việt Nam]",,,,,
1,5881f1630802cb5c60818010,The Cafe Apartments,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583327600,False,"The Cafe Apartment, 42 Nguyen Hue Street, Dist...",10.773743,106.70394,"[{'label': 'display', 'lat': 10.77374289584670...",893,VN,,Việt Nam,"[The Cafe Apartment, 42 Nguyen Hue Street, Dis...",,,,,
2,524d970d11d2e6f625dcbacb,Starbucks (Starbucks Coffee Rex Hotel),"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,146 - 148 Pasteur St,10.775614,106.70053,"[{'label': 'display', 'lat': 10.77561375300064...",467,VN,Thành phố Hồ Chí Minh,Việt Nam,"[146 - 148 Pasteur St (Le Thanh Ton), Thành ph...",Le Thanh Ton,700000.0,Thành phố Hồ Chí Minh,,
3,50ae4436e4b04c937a56de9c,Starbucks (Starbucks Coffee @ New World Hotel),"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,"76 Le Lai St., Dist. 1",10.771221,106.693656,"[{'label': 'display', 'lat': 10.77122058818865...",758,VN,Thành phố Hồ Chí Minh,Việt Nam,"[76 Le Lai St., Dist. 1 (btw. Pham Hong Thai S...",btw. Pham Hong Thai St. and Nguyen Thi Nghia St.,,Thành phố Hồ Chí Minh,76915000.0,
4,59776705b8fd9d580a2cd730,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,"241bis Cach Mang Thang 8 St., District 3",10.776537,106.683749,"[{'label': 'display', 'lat': 10.77653679632438...",1413,VN,Thành phố Hồ Chí Minh,Việt Nam,"[241bis Cach Mang Thang 8 St., District 3 (Die...",Dien Bien Phu,,Thành phố Hồ Chí Minh,,
5,59997bb15d0fea4b39463d94,Starbucks Reserve,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,"11 - 13 Han Thuyen St., District 1",10.778961,106.698185,"[{'label': 'display', 'lat': 10.77896139444801...",244,VN,Thành phố Hồ Chí Minh,Việt Nam,"[11 - 13 Han Thuyen St., District 1, Thành phố...",,,Thành phố Hồ Chí Minh,,
6,4bf016fb3a002d7fe0a585a4,Caravelle Hotel,"[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",v-1583327600,False,"19 Lam Son Square, District 1",10.776482,106.703359,"[{'label': 'display', 'lat': 10.77648195838112...",740,VN,Thành phố Hồ Chí Minh,Việt Nam,"[19 Lam Son Square, District 1, Thành phố Hồ C...",,,Thành phố Hồ Chí Minh,,
7,554faad7498e38786638336f,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,"39 Le Duan St., District 1",10.781078,106.700207,"[{'label': 'display', 'lat': 10.781078, 'lng':...",567,VN,Thành phố Hồ Chí Minh,Việt Nam,"[39 Le Duan St., District 1, Thành phố Hồ Chí ...",,,Thành phố Hồ Chí Minh,,
8,53661368498e9da412ec776c,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583327600,False,"191-193 Đề Thám Street, District 1",10.767544,106.693949,"[{'label': 'display', 'lat': 10.76754409344893...",1132,VN,Thành phố Hồ Chí Minh,Việt Nam,"[191-193 Đề Thám Street, District 1 (Bùi Viện)...",Bùi Viện,,Thành phố Hồ Chí Minh,,
9,56764e09498eca1773f719b4,Cộng Cà Phê,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583327600,False,"26 Lý Tự Trọng, Quận 1",10.777917,106.701266,"[{'label': 'display', 'lat': 10.77791742055850...",508,VN,Thành phố Hồ Chí Minh,Việt Nam,"[26 Lý Tự Trọng, Quận 1, Thành phố Hồ Chí Minh...",,,Thành phố Hồ Chí Minh,,


In [62]:
results = requests.get(get_foursquare_url(CLIENT_ID,CLIENT_SECRET,VERSION,10.790275,106.628605,2250)).json()

In [63]:
venue = results['response']['venues']

In [68]:
venue[0]['location']['lat']

10.787477

In [66]:
venue

[{'id': '5e5240407f39440008eba736',
  'name': 'Cao Coffee',
  'location': {'lat': 10.787477,
   'lng': 106.62361,
   'labeledLatLngs': [{'label': 'display',
     'lat': 10.787477,
     'lng': 106.62361}],
   'distance': 628,
   'cc': 'VN',
   'city': 'Thành phố Hồ Chí Minh',
   'state': 'Thành phố Hồ Chí Minh',
   'country': 'Việt Nam',
   'formattedAddress': ['Thành phố Hồ Chí Minh',
    'Thành phố Hồ Chí Minh',
    'Việt Nam']},
  'categories': [{'id': '4bf58dd8d48988d1e0931735',
    'name': 'Coffee Shop',
    'pluralName': 'Coffee Shops',
    'shortName': 'Coffee Shop',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/coffeeshop_',
     'suffix': '.png'},
    'primary': True}],
  'referralId': 'v-1583330426',
  'hasPerk': False},
 {'id': '53a9a04f498e1e9a997ca1b6',
  'name': 'Starbucks',
  'location': {'address': '1/1 Truong Chinh St., Tan Phu Dist.',
   'lat': 10.806511172818963,
   'lng': 106.63469136407473,
   'labeledLatLngs': [{'label': 'display',
     'lat':

In [56]:
nearby_venues = json_normalize(venue)

In [58]:
nearby_venues

Unnamed: 0,id,name,categories,referralId,hasPerk,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.city,location.state,location.country,location.formattedAddress,location.address,location.postalCode,location.crossStreet
0,5e5240407f39440008eba736,Cao Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583330083,False,10.787477,106.62361,"[{'label': 'display', 'lat': 10.787477, 'lng':...",628,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[Thành phố Hồ Chí Minh, Thành phố Hồ Chí Minh,...",,,
1,53a9a04f498e1e9a997ca1b6,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583330083,False,10.806511,106.634691,"[{'label': 'display', 'lat': 10.80651117281896...",1926,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[1/1 Truong Chinh St., Tan Phu Dist., Thành ph...","1/1 Truong Chinh St., Tan Phu Dist.",,
2,5cc91ec05ba04600396e48e1,Bodyfriend Massage Coffee,"[{'id': '5665c7b9498e7d8a4f2c0f06', 'name': 'C...",v-1583330083,False,10.800833,106.61616,"[{'label': 'display', 'lat': 10.800833, 'lng':...",1798,VN,Hồ Chí Minh,,Việt Nam,"[Hồ Chí Minh, Việt Nam]",,,
3,5b288ec50868a2002c96b412,Wayne’s Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583330083,False,10.78922,106.63663,"[{'label': 'display', 'lat': 10.78922, 'lng': ...",885,VN,Ho Chi Minh City - Quận 10,Thành phố Hồ Chí Minh,Việt Nam,"[Ground Floor Viettel Complex Building, 285 Cá...","Ground Floor Viettel Complex Building, 285 Các...",700000.0,
4,4f1a3b13e4b0d1a654726006,vuon tre,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583330083,False,10.801179,106.64589,"[{'label': 'display', 'lat': 10.80117863823167...",2246,VN,,Thành phố Hồ Chí Minh,Việt Nam,"[42 Đồng Xoài, phường 13, Tân Bình district, P...","42 Đồng Xoài, phường 13, Tân Bình district, Pr...",,
5,5b8ca052968f31002c043a94,The Coffee House,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583330083,False,10.77315,106.627594,"[{'label': 'display', 'lat': 10.7731495, 'lng'...",1909,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[184 To Hieu St., Tan Phu Dist., Thành phố Hồ ...",,,"184 To Hieu St., Tan Phu Dist."
6,591d40429b047319cb3f01cb,The Coffee House,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583330083,False,10.792879,106.628855,"[{'label': 'display', 'lat': 10.792879, 'lng':...",291,VN,,Thành phố Hồ Chí Minh,Việt Nam,"[Độc Lập, Thành phố Hồ Chí Minh, Việt Nam]",Độc Lập,760003.0,
7,4f86d20ae4b05dd564805736,Cafe 55 phan đình phùng,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583330083,False,10.789592,106.630515,"[{'label': 'display', 'lat': 10.78959174365355...",222,VN,Hcm,,Việt Nam,"[55 (Phan đình phùng), Hcm, Việt Nam]",55,,Phan đình phùng
8,52c6b8a7498e32caa049a983,Food Court @ AEON Mall,"[{'id': '4bf58dd8d48988d128941735', 'name': 'C...",v-1583330083,False,10.801273,106.61788,"[{'label': 'display', 'lat': 10.80127322847411...",1695,VN,,,Việt Nam,[Việt Nam],,,
9,5bcafa784c954c002c6c81d4,the coffee house,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583330083,False,10.786697,106.64347,"[{'label': 'display', 'lat': 10.786697, 'lng':...",1673,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[Thành phố Hồ Chí Minh, Thành phố Hồ Chí Minh,...",,,
