<h1> <b> Scripts for Location Data from Source </b> </h1>
<i> Source: <a> https://www.google.com/maps</a> </i>

In [52]:
from selenium.webdriver import Chrome
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
import pandas as pd

<br>
<h1> <b> Functions Used </b> </h1> 

In [77]:
def search(query, driver):
    search_box = driver.find_element(By.ID, 'searchboxinput')
    search_box.send_keys(query)
    search_box.send_keys(Keys.ENTER)
    
    sleep(2)

def init_base():
    path = 'chromedriver\chromedriver.exe'
    driver = Chrome(path)
       
    return driver

def add_data(transport_data_temp, transportation_data):
    transportation_data['Bus Stations'].extend(transport_data_temp['Bus Stations'])
    transportation_data['Train Stations'].extend(transport_data_temp['Train Stations'])
    transportation_data['Airports'].extend(transport_data_temp['Airports'])
    transportation_data['Ports'].extend(transport_data_temp['Ports'])

def search_by_category(driver, transport_data_temp, province):
    '''
    search the data via categories; given by the keys in 
    transport data temp
    '''
    for category in list(transport_data_temp.keys()): # where category is a string; the key for all transportation categories in transport_data_temp
        category_query = category + ' near ' + province
        search(category_query, driver)
        results = driver.find_elements(By.CSS_SELECTOR, ('.mapsConsumerUiSubviewSectionGm2Placeresultcontainer__result-container'
                                                        + ' a'))
        if len(results) == 0:
            result = driver.find_element(By.CSS_SELECTOR, '.section-hero-header-title-title span').text
            results_formatted = [result]
        else:
            results_formatted = [result.get_attribute('aria-label') for result in results]
            
        transport_data_temp[category].append(results_formatted)
        
        x_button = driver.find_element(By.CSS_SELECTOR, '.gsst_a #sb_cb50' )
        x_button.click()
    
def search_by_province(driver, province_batch, transportation_data):
    '''
    this function searches all transportations as per category for each
    province in the batch, then adds only when all provinces have been 
    searched.
    '''
    transportation_data_temp = {
        'Bus Stations':[],
        'Train Stations':[],
        'Airports':[],
        'Ports':[]
    }
    
    for province in province_batch:
        search_by_category(driver, transportation_data_temp, province)
    
    # once all provinces have been searched, add via extend all the searched data
    add_data(transportation_data_temp, transportation_data)    
    

        
        


<br>
<h1> <b> Data Extraction and Cleaning </b> </h1>

In [93]:
location_data = pd.read_csv('data/Province_Data.csv')

In [94]:
location_data.head()

Unnamed: 0,Province,Population,Main Language
0,Metro Manila,12877253,Filipino
1,Camarines Sur,1952544,Bikol
2,Cagayan,675950,Ilocano
3,Ilocos Norte,593081,Ilocano
4,Ilocos Sur,689668,Ilocano


In [95]:
location_data

Unnamed: 0,Province,Population,Main Language
0,Metro Manila,12877253,Filipino
1,Camarines Sur,1952544,Bikol
2,Cagayan,675950,Ilocano
3,Ilocos Norte,593081,Ilocano
4,Ilocos Sur,689668,Ilocano
5,Cebu,76500000,Cebuano
6,Pampanga,2198110,Kapampangan
7,Palawan,849469,Palawano
8,Ifugao,202802,Batad
9,Davao,632588,Davaoeño


In [96]:
# format population column -> change to thousands
location_data['Population (in thousands)'] = location_data['Population'].apply(lambda x: x/1000)

# add secondary language data -> all filipino
location_data.rename(columns={'Main':'Primary Language'}, inplace=True)
location_data['Secondary Language'] = 'Filipino'

In [97]:
# get lenght of batches
location_data.shape

# do search by batches of 7 provinces 

(21, 5)

In [67]:
# code for adding transportation data
driver = init_base()
driver.get('https://www.google.com/maps')

In [81]:
transportation_data = {
    'Bus Stations':[],
    'Train Stations':[],
    'Airports':[],
    'Ports':[]
}

provinces = location_data.Province.to_list()

In [82]:
# batch 1
i = 0
province_batch = search_by_province(driver, provinces[i:i+7], transportation_data)
i+=7

In [83]:
# batch 2
province_batch = search_by_province(driver, provinces[i:i+7], transportation_data)
i+=7

In [84]:
# batch 3
province_batch = search_by_province(driver, provinces[i:i+7], transportation_data)
i+=7

In [98]:
location_data['Bus Stations'] = transportation_data['Bus Stations']
location_data['Train Stations'] = transportation_data['Train Stations']
location_data['Airports'] = transportation_data['Airports']
location_data['Ports'] = transportation_data['Ports']

In [100]:
# remove population in millions
location_data.drop('Population', inplace=True, axis=1)

# format the location data
location_data_formatted = location_data[['Population (in thousands)','Main Language', 'Secondary Language', 'Bus Stations', 'Train Stations', 'Airports', 'Ports']]

In [101]:
location_data_formatted

Unnamed: 0,Population (in thousands),Main Language,Secondary Language,Bus Stations,Train Stations,Airports,Ports
0,12877.253,Filipino,Filipino,"[UERM Memorial Medical Center, J. Ruiz, Aurora...","[J. Ruiz, V. Mapa, Gilmore, Santa Mesa, Pureza...","[Ninoy Aquino International Airport, Manila In...",[Port of Manila]
1,1952.544,Bikol,Filipino,"[Ocampo, Ocampo, Baao, Baao, Pili, Pili, Iriga...","[Sipocot, PNR Baao Station, Train Station, PNR...","[Naga Airport, Bicol International Airport, Da...","[Sabang Port, Port of Pasacao, Guijalo Port, B..."
2,675.95,Ilocano,Filipino,"[Camalaniugan, Camalaniugan, Lal-Lo, Lal-Lo, A...","[PNR Tuguegarao Central Station, GV Florida Bu...","[Cagayan North International Airport, Tuguegar...","[San Vicente Fish Port, Port Irene, Casambalan..."
3,593.081,Ilocano,Filipino,"[San Nicolas, San Nicolas, Laoag, Laoag, Parta...","[GV Florida Bus Station, Caltex Airport Laoag,...","[Laoag International Airport, Liagens, Laoag C...","[Currimao Port, Currimao, PAGSANAHAN PORT to B..."
4,689.668,Ilocano,Filipino,"[Candon City, Candon City, Candon City, Candon...","[Vigan Terminal, Candon City, Aniceto Bus Line...","[Vigan Airport, Laoag City International Airpo...","[Salomague Port, Quirino Tagudin ilocos sur, B..."
5,76500.0,Cebuano,Filipino,"[Professional Regulations Commission, Securiti...",[Cebu city train station],[PAir Philippines - Mactan - Cebu City Interna...,"[Cebu Port Authority, Cebu Port Authority, Ceb..."
6,2198.11,Kapampangan,Filipino,"[San Fernando City, San Fernando City, Transpo...","[Gov.Pascual, Sangandaan, Asistio, Monumento, ...","[Clark International Airport, Clark Internatio...","[Philippine Ports Authority, Plant Quarantine ..."
7,849.469,Palawano,Filipino,"[San Jose Terminal, Roro Bus Station, Roxas Bu...","[El Nido Transport Terminal, Coron Bus and Jee...","[Puerto Princesa International Airport, El Nid...",[Philippine Ports Authority - Port Management ...
8,202.802,Batad,Filipino,"[Lagawe, Lagawe, Lagawa, Kiangan, Banaue, Bana...","[Rapid Kl Train Station 2, Hanging Bridge, Rap...",[Civil Aviation Authority of the Philippines -...,[Philippine Ports Authority - PMO Northern Luz...
9,632.588,Davaoeño,Filipino,"[Calinan, Calinan Public Market Jeepney Stop, ...","[Catalunan Grande Jeepney Station, Davao City ...","[Francisco Bangoy International Airport, Davao...",[Philippine Ports Authority Port District Offi...


In [103]:
file_path = 'data/Location_Source.csv'
location_data_formatted.to_csv(file_path, index=False)

In [106]:
# check
df = pd.read_csv('data/Location_Source.csv')
df.head()

Unnamed: 0,Population (in thousands),Main Language,Secondary Language,Bus Stations,Train Stations,Airports,Ports
0,12877.253,Filipino,Filipino,"['UERM Memorial Medical Center', 'J. Ruiz', 'A...","['J. Ruiz', 'V. Mapa', 'Gilmore', 'Santa Mesa'...","['Ninoy Aquino International Airport', 'Manila...",['Port of Manila']
1,1952.544,Bikol,Filipino,"['Ocampo', 'Ocampo', 'Baao', 'Baao', 'Pili', '...","['Sipocot', 'PNR Baao Station', 'Train Station...","['Naga Airport', 'Bicol International Airport'...","['Sabang Port', 'Port of Pasacao', 'Guijalo Po..."
2,675.95,Ilocano,Filipino,"['Camalaniugan', 'Camalaniugan', 'Lal-Lo', 'La...","['PNR Tuguegarao Central Station', 'GV Florida...","['Cagayan North International Airport', 'Tugue...","['San Vicente Fish Port', 'Port Irene', 'Casam..."
3,593.081,Ilocano,Filipino,"['San Nicolas', 'San Nicolas', 'Laoag', 'Laoag...","['GV Florida Bus Station', 'Caltex Airport Lao...","['Laoag International Airport', 'Liagens', 'La...","['Currimao Port', 'Currimao', 'PAGSANAHAN PORT..."
4,689.668,Ilocano,Filipino,"['Candon City', 'Candon City', 'Candon City', ...","['Vigan Terminal', 'Candon City', 'Aniceto Bus...","['Vigan Airport', 'Laoag City International Ai...","['Salomague Port', 'Quirino Tagudin ilocos sur..."
