In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import geocoder
import folium
from geopy.geocoders import Nominatim # convert an address to langtitude and longitude
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# import k-means from clustering stage
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
url = "https://vi.wikipedia.org/wiki/Th%C3%A0nh_ph%E1%BB%91_H%E1%BB%93_Ch%C3%AD_Minh"

r  = requests.get(url)

data = r.text

soup = BeautifulSoup(data,'lxml')

In [3]:
df = []
for i in soup.find_all("table",class_="wikitable sortable")[0].find_all("tr"):
    ls = []
    for j in i.find_all("td"):
        ls.append(j.get_text())
    df.append(ls)
df = pd.DataFrame(df)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,,,,,,,,,,,...,,,,,,,,,,
1,\n\n\nTên\n\nDiện tích (km²)\nDân số (người)\n...,Quận (19)\n,Quận 1\n,772,142.0,10 phường\n,Quận 2\n,4979.0,180.0,11 phường\n,...,"1 thị trấn, 20 xã\n",Hóc Môn\n,10917.0,542.0,"1 thị trấn, 11 xã\n",Nhà Bè\n,10043.0,206.0,"1 thị trấn, 6 xã\n",\n
2,,,,,,,,,,,...,,,,,,,,,,
3,Quận (19)\n,,,,,,,,,,...,,,,,,,,,,
4,Quận 1\n,772,142.000,10 phường\n,,,,,,,...,,,,,,,,,,


In [5]:
df.drop(labels=[0,1,2,3,16,24],inplace=True)
df = df.iloc[:,0:3]
df.columns=['District Name','Acreage (Km2)','Population']
df.reset_index(drop=True,inplace=True)
# Remove EOL "\n" String in dataset
for i in df.columns:
    df[i] = df[i].str.replace("\n","",regex=True)

In [6]:
df['Acreage (Km2)'] = df['Acreage (Km2)'].str.replace(",",".",regex=True)
df['Population'] = df['Population'].str.replace(".","",regex=True)
df['Acreage (Km2)'].astype(float)
df['Population'].astype(int)

0     142000
1     180000
2     190000
3     175000
4     159000
5     233000
6     360000
7     424000
8     397000
9     234000
10    209000
11    620000
12    784000
13    499000
14    676000
15    163000
16    474000
17    485000
18    592000
19    705000
20     71000
21    462000
22    542000
23    206000
Name: Population, dtype: int32

In [7]:
df.head()

Unnamed: 0,District Name,Acreage (Km2),Population
0,Quận 1,7.72,142000
1,Quận 2,49.79,180000
2,Quận 3,4.92,190000
3,Quận 4,4.18,175000
4,Quận 5,4.27,159000


In [8]:
df.shape

(24, 3)

In [9]:
df_hcmc = df.copy()

### Prepare for fourquare dataframe

In [10]:
CLIENT_ID = 'TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG' # your Foursquare ID
CLIENT_SECRET = 'G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG
CLIENT_SECRET:G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3


In [11]:
df_hcm_cor = pd.read_csv("HCM district coordinates.csv")

In [12]:
df_hcm_cor.head()

Unnamed: 0,District Name,Latitude,Longitude
0,Quận 1,10.777369,106.696646
1,Quận 2,10.782377,106.754713
2,Quận 3,10.780681,106.680866
3,Quận 4,10.758388,106.702021
4,Quận 5,10.755418,106.667333


In [13]:
df_hcm_cor = df_hcm_cor.sort_values(by="District Name",ascending=True)
df_hcmc = df_hcmc.sort_values(by="District Name",ascending=True)

In [14]:
df_hcm_cor['District Name'] == df_hcmc['District Name']

19    True
13    True
12    True
20    True
21    True
14    True
22    True
23    True
15    True
0     True
9     True
10    True
11    True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
18    True
16    True
17    True
Name: District Name, dtype: bool

In [15]:
df_hcmc = df_hcmc.merge(df_hcm_cor.iloc[:,1:3],left_index=True,right_index=True)

In [16]:
df_hcmc

Unnamed: 0,District Name,Acreage (Km2),Population,Latitude,Longitude
19,Bình Chánh,252.56,705000,10.724583,106.575197
13,Bình Thạnh,20.78,499000,10.812639,106.714579
12,Bình Tân,52.02,784000,10.770324,106.599978
20,Cần Giờ,704.45,71000,10.535622,106.854503
21,Củ Chi,434.77,462000,11.008502,106.518123
14,Gò Vấp,19.73,676000,10.837427,106.666492
22,Hóc Môn,109.17,542000,10.891521,106.600013
23,Nhà Bè,100.43,206000,10.650182,106.729357
15,Phú Nhuận,4.88,163000,10.799939,106.677777
0,Quận 1,7.72,142000,10.777369,106.696646


In [17]:
address = 'Ho Chi Minh'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Ho Chi Minh City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Ho Chi Minh City are 10.7758439, 106.7017555.


In [18]:
m = folium.Map(location=[latitude, longitude],zoom_start=12)
#url = 'https://cocl.us/sanfran_geojson'
#geo_data_input = f'{url}'
for index,values in df_hcmc.iterrows():
    folium.CircleMarker(
        [values['Latitude'], values['Longitude']],
        radius=2,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(m)  
m

In [19]:
CLIENT_ID = 'TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG' # your Foursquare ID
CLIENT_SECRET = 'G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TYNGSEF0SNXTWZU5JEPMP3Z3BSCBA2TQEK2ODCGA114NG4GG
CLIENT_SECRET:G21UECLTFUK3MLIFP4SQK03OR1JMMDI4EPQS4J12TF0CFMP3


In [31]:
# type your answer here
def get_foursquare_url(cl_id,cl_se,ver,lat,long,radius):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&categoryId=4bf58dd8d48988d1e0931735,5665c7b9498e7d8a4f2c0f06,5e18993feee47d000759b256,4bf58dd8d48988d1a1941735,4bf58dd8d48988d128941735,4bf58dd8d48988d16d941735,54135bf5e4b08f3d2429dfe7,56aa371be4b08b9a8d573508,54f4ba06498e2cf5561da814,4bf58dd8d48988d18d941735,4bf58dd8d48988d1f0941735'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        long, 
        radius, 
        )
    return url

In [32]:
results = requests.get(get_foursquare_url(CLIENT_ID,CLIENT_SECRET,VERSION,10.724583,106.575197,8960)).json()
results

{'meta': {'code': 200, 'requestId': '5e5ef3b014a126001bf1d005'},
 'response': {'venues': [{'id': '5b288ec50868a2002c96b412',
    'name': 'Wayne’s Coffee',
    'location': {'address': 'Ground Floor Viettel Complex Building, 285 Cách Mạng Tháng Tám, Phường 12',
     'lat': 10.78922,
     'lng': 106.63663,
     'labeledLatLngs': [{'label': 'display',
       'lat': 10.78922,
       'lng': 106.63663}],
     'distance': 9844,
     'postalCode': '700000',
     'cc': 'VN',
     'city': 'Ho Chi Minh City - Quận 10',
     'state': 'Thành phố Hồ Chí Minh',
     'country': 'Việt Nam',
     'formattedAddress': ['Ground Floor Viettel Complex Building, 285 Cách Mạng Tháng Tám, Phường 12',
      'Ho Chi Minh City - Quận 10',
      'Thành phố Hồ Chí Minh',
      'Việt Nam']},
    'categories': [{'id': '4bf58dd8d48988d1e0931735',
      'name': 'Coffee Shop',
      'pluralName': 'Coffee Shops',
      'shortName': 'Coffee Shop',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/coffees

In [34]:
results

{'meta': {'code': 200, 'requestId': '5e5ef3b014a126001bf1d005'},
 'response': {'venues': [{'id': '5b288ec50868a2002c96b412',
    'name': 'Wayne’s Coffee',
    'location': {'address': 'Ground Floor Viettel Complex Building, 285 Cách Mạng Tháng Tám, Phường 12',
     'lat': 10.78922,
     'lng': 106.63663,
     'labeledLatLngs': [{'label': 'display',
       'lat': 10.78922,
       'lng': 106.63663}],
     'distance': 9844,
     'postalCode': '700000',
     'cc': 'VN',
     'city': 'Ho Chi Minh City - Quận 10',
     'state': 'Thành phố Hồ Chí Minh',
     'country': 'Việt Nam',
     'formattedAddress': ['Ground Floor Viettel Complex Building, 285 Cách Mạng Tháng Tám, Phường 12',
      'Ho Chi Minh City - Quận 10',
      'Thành phố Hồ Chí Minh',
      'Việt Nam']},
    'categories': [{'id': '4bf58dd8d48988d1e0931735',
      'name': 'Coffee Shop',
      'pluralName': 'Coffee Shops',
      'shortName': 'Coffee Shop',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/coffees

In [35]:
venue = results['response']['venues']

In [37]:
nearby_venues = json_normalize(venue)

In [38]:
nearby_venues

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.crossStreet
0,5b288ec50868a2002c96b412,Wayne’s Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583281118,False,"Ground Floor Viettel Complex Building, 285 Các...",10.78922,106.63663,"[{'label': 'display', 'lat': 10.78922, 'lng': ...",9844,700000.0,VN,Ho Chi Minh City - Quận 10,Thành phố Hồ Chí Minh,Việt Nam,"[Ground Floor Viettel Complex Building, 285 Cá...",
1,5b6d5a08121384002c083f89,passio,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583281118,False,,10.749037,106.653464,"[{'label': 'display', 'lat': 10.749037, 'lng':...",8982,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[LUCKY PALACE, Thành phố Hồ Chí Minh, Thành ph...",LUCKY PALACE
2,5dabb5d4bfb9f2000804bb66,"Phúc Long tea & coffee - lê quang sung st,.","[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583281118,False,"4 Lê Quang Sung St., W2, D6",10.750984,106.649701,"[{'label': 'display', 'lat': 10.750984, 'lng':...",8662,700000.0,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[4 Lê Quang Sung St., W2, D6 (Phạm Đình Hổ St,...","Phạm Đình Hổ St,."
3,5e1c169c6356c10008901cb6,Cafe 268,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583281118,False,,10.763842,106.644311,"[{'label': 'display', 'lat': 10.763842, 'lng':...",8731,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[Thành phố Hồ Chí Minh, Thành phố Hồ Chí Minh,...",
4,5bd7a2dea0215b002c481285,Trung Nguyên Legend,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1583281118,False,"807 Tạ Quang Bửu, P.5, Q.8",10.736383,106.669937,"[{'label': 'display', 'lat': 10.736383, 'lng':...",10444,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[807 Tạ Quang Bửu, P.5, Q.8, Thành phố Hồ Chí ...",
5,4d2284126e8c3704d2a10ca0,7 Kỳ Quang 七奇觀,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583281118,False,So 12 Duong 26,10.741957,106.63234,"[{'label': 'display', 'lat': 10.74195747788736...",6542,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[So 12 Duong 26 (P.11 Q.6), Thành phố Hồ Chí M...",P.11 Q.6
6,4dbf9d44043706a0320254db,Cà phê Vườn Đá - Stone Garden,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583281118,False,"3 Hòa Bình, Ward 3, Dist 11",10.766884,106.640002,"[{'label': 'display', 'lat': 10.7668842959117,...",8509,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[3 Hòa Bình, Ward 3, Dist 11, Thành phố Hồ Chí...",
7,5cc91ec05ba04600396e48e1,Bodyfriend Massage Coffee,"[{'id': '5665c7b9498e7d8a4f2c0f06', 'name': 'C...",v-1583281118,False,,10.800833,106.61616,"[{'label': 'display', 'lat': 10.800833, 'lng':...",9597,,VN,Hồ Chí Minh,,Việt Nam,"[Hồ Chí Minh, Việt Nam]",
8,4f86d20ae4b05dd564805736,Cafe 55 phan đình phùng,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583281118,False,55,10.789592,106.630515,"[{'label': 'display', 'lat': 10.78959174365355...",9432,,VN,Hcm,,Việt Nam,"[55 (Phan đình phùng), Hcm, Việt Nam]",Phan đình phùng
9,5c98aac6fd16bb0039968509,Trung Nguyen Legend Cafe,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1583281118,False,,10.749765,106.6502,"[{'label': 'display', 'lat': 10.749765, 'lng':...",8668,,VN,Thành phố Hồ Chí Minh,Thành phố Hồ Chí Minh,Việt Nam,"[Thành phố Hồ Chí Minh, Thành phố Hồ Chí Minh,...",
