# **Import Necessary Packages**

In [2]:
import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

import time

# **Load Thai/English Province csv File**

In [3]:
df_province = pd.read_csv('./outsource_data/mapper-provinces-thailand.csv').iloc[:, 1:]
df_province['ProvinceTH'] = df_province['ProvinceTH'].str.replace('จ.', '', regex = False)
df_province.sort_values(by = 'ProvinceTH', inplace = True)

df_province

Unnamed: 0,ProvinceTH,ProvinceEN
63,กระบี่,Krabi
29,กรุงเทพมหานคร,Bangkok
58,กาญจนบุรี,Kanchanaburi
9,กาฬสินธุ์,Kalasin
30,กำแพงเพชร,Kamphaeng Phet
...,...,...
61,เพชรบุรี,Phetchaburi
40,เพชรบูรณ์,Phetchabun
20,เลย,Loei
7,แพร่,Phrae


# **Load Thai/Region Province csv File**

In [4]:
df_region = pd.read_csv('./outsource_data/25640531_sc002_.csv').loc[:, ['ProvinceNameThai', 'Region_VaccineRollout_MOPH']]
df_region.rename(columns = {'ProvinceNameThai': 'ProvinceTH', 'Region_VaccineRollout_MOPH': 'Region'}, inplace = True)
df_region['ProvinceTH'] = df_region['ProvinceTH'].str.replace('จังหวัด', '', regex = False)
df_region.sort_values(by = 'ProvinceTH', inplace = True)

region_mapping = {
    'กรุงเทพมหานครและปริมณฑล': 'Bangkok Metropolitan',
    'ภาคใต้': 'Southern',
    'ภาคตะวันตก': 'Western',
    'ภาคตะวันออกเฉียงเหนือ': 'Northeastern',
    'ภาคกลาง': 'Central',
    'ภาคตะวันออก': 'Eastern',
    'ภาคเหนือ': 'Northern'
}
df_region['Region'] = df_region['Region'].map(region_mapping)

df_region

Unnamed: 0,ProvinceTH,Region
1,กระบี่,Southern
0,กรุงเทพมหานคร,Bangkok Metropolitan
2,กาญจนบุรี,Western
3,กาฬสินธุ์,Northeastern
4,กำแพงเพชร,Central
...,...,...
38,เพชรบุรี,Western
39,เพชรบูรณ์,Central
54,เลย,Northeastern
40,แพร่,Northern


In [5]:
df_region['Region'].value_counts()

Northeastern            20
Central                 16
Southern                14
Northern                 9
Eastern                  7
Bangkok Metropolitan     6
Western                  5
Name: Region, dtype: int64

# **Merge Dataframes**

In [6]:
df_merged = pd.merge(df_province, df_region, on = 'ProvinceTH')
df_merged

Unnamed: 0,ProvinceTH,ProvinceEN,Region
0,กระบี่,Krabi,Southern
1,กรุงเทพมหานคร,Bangkok,Bangkok Metropolitan
2,กาญจนบุรี,Kanchanaburi,Western
3,กาฬสินธุ์,Kalasin,Northeastern
4,กำแพงเพชร,Kamphaeng Phet,Central
...,...,...,...
72,เพชรบุรี,Phetchaburi,Western
73,เพชรบูรณ์,Phetchabun,Central
74,เลย,Loei,Northeastern
75,แพร่,Phrae,Northern


# **Use ProvinceTH to find latitude, longitude from Google Maps using Selenium**|

In [7]:
url = 'https://www.google.co.th/maps?hl=en&tab=rl'
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.get(url)
df_locations = []

for province in df_merged['ProvinceTH']:
    search_input = driver.find_element(By.CSS_SELECTOR, 'input#searchboxinput')
    search_button = driver.find_element(By.CSS_SELECTOR, '#searchbox-searchbutton')

    search_input.send_keys(province) #enter province name
    search_button.click() #click search button
    time.sleep(4) #wait for downloading
    current_url = driver.current_url
    location_data = current_url.split('/')[6].split(',')
    lat = float(location_data[0][1:])
    long = float(location_data[1])
    df_locations.append([province, lat, long])
    print([province, lat, long])
    driver.get(url)
driver.quit()
df_locations = pd.DataFrame(df_locations, columns = ['ProvinceTH', 'lat', 'long'])



Current google-chrome version is 101.0.4951
Get LATEST chromedriver version for 101.0.4951 google-chrome
Driver [C:\Users\jameb\.wdm\drivers\chromedriver\win32\101.0.4951.41\chromedriver.exe] found in cache


['กระบี่', 8.0406291, 98.7130728]
['กรุงเทพมหานคร', 13.725108, 100.3522449]
['กาญจนบุรี', 14.0362441, 99.0406695]
['กาฬสินธุ์', 16.6428199, 103.1072453]
['กำแพงเพชร', 16.3951033, 98.9516581]
['ขอนแก่น', 16.4440318, 102.7498821]
['จันทบุรี', 12.601872, 102.0741947]
['ฉะเชิงเทรา', 13.5784167, 100.8575589]
['ชลบุรี', 13.049722, 100.6201887]
['ชัยนาท', 15.1612489, 99.7530515]
['ชัยภูมิ', 16.0082692, 101.3313029]
['ชุมพร', 10.3180083, 98.8032625]
['ตรัง', 7.5145433, 99.2267157]
['ตราด', 12.160432, 102.2938823]
['ตาก', 16.524854, 98.0538342]
['นครนายก', 14.2020268, 101.2099701]
['นครปฐม', 13.9144348, 99.9429395]
['นครพนม', 17.4062498, 104.1132431]
['นครราชสีมา', 14.9657858, 101.5348641]
['นครศรีธรรมราช', 8.5836543, 99.2236029]
['นครสวรรค์', 15.6888632, 100.0510229]
['นนทบุรี', 13.9648817, 100.2746173]
['นราธิวาส', 6.184983, 101.4513949]
['น่าน', 18.8240891, 100.2848358]
['บึงกาฬ', 18.110198, 103.4355461]
['บุรีรัมย์', 14.963852, 102.4069166]
['ปทุมธานี', 14.0965665, 100.3609565]
['ประจวบคีรี

In [8]:
df_locations

Unnamed: 0,ProvinceTH,lat,long
0,กระบี่,8.040629,98.713073
1,กรุงเทพมหานคร,13.725108,100.352245
2,กาญจนบุรี,14.036244,99.040670
3,กาฬสินธุ์,16.642820,103.107245
4,กำแพงเพชร,16.395103,98.951658
...,...,...,...
72,เพชรบุรี,12.954423,99.324372
73,เพชรบูรณ์,16.248765,100.652375
74,เลย,17.487419,100.932519
75,แพร่,18.262051,99.397629


In [16]:
df_final_merged = pd.merge(df_merged, df_locations, on = 'ProvinceTH', how = 'left')

df_final_merged.to_csv('./processed_data/mapping_thailand_province_with_region_and_location.csv', index = False)
df_final_merged

Unnamed: 0,ProvinceTH,ProvinceEN,Region,lat,long
0,กระบี่,Krabi,Southern,8.040629,98.713073
1,กรุงเทพมหานคร,Bangkok,Bangkok Metropolitan,13.725108,100.352245
2,กาญจนบุรี,Kanchanaburi,Western,14.036244,99.040670
3,กาฬสินธุ์,Kalasin,Northeastern,16.642820,103.107245
4,กำแพงเพชร,Kamphaeng Phet,Central,16.395103,98.951658
...,...,...,...,...,...
72,เพชรบุรี,Phetchaburi,Western,12.954423,99.324372
73,เพชรบูรณ์,Phetchabun,Central,16.248765,100.652375
74,เลย,Loei,Northeastern,17.487419,100.932519
75,แพร่,Phrae,Northern,18.262051,99.397629
