##### This script is to scrape the required data from vlr.gg for model training purpose

- navigate to the event page
- select event
- select matches
- scrape the table based on the map
- repeat the process until all matches are scrapped

### Import libraries

In [5]:
# selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# other libraries needed
import time
import pandas as pd

### Webdriver

In [6]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
options = Options()
# options.add_argument('--headless=new') # uncomment this when require to run in headless mode

# launch the driver
url = "https://www.vlr.gg/events" # directly go to event tab (all results are stored in there)
driver.get(url)
driver.maximize_window()
main_tab_handle = driver.current_window_handle # to locate first tab
driver.implicitly_wait(5) # wait for the content to load completely before scrape (5 sec)

### Scrapping

#### Function: Locate/direct to match details and scrape

In [7]:
# scrape function
def getMatch(a):
    # get the name of the event
    event_name = a.find_element(By.CLASS_NAME, 'event-item-title').text
    print(f"scrapping data for {event_name}", end="...")
    # open new tab to show event detail
    action = ActionChains(driver)
    action.key_down(Keys.CONTROL).click(a).key_up(Keys.CONTROL).perform()
    driver.switch_to.window(driver.window_handles[1])
    event_window = driver.current_window_handle
    
    # navigate to matches tab
    nav_bar = driver.find_element(By.CLASS_NAME, 'wf-nav')
    matches = nav_bar.find_elements(By.TAG_NAME, 'a') 
    matches[1].click() # all matches record are stored in matches tab [second tab]
    
    # click dropdown menu to select "All Matches" to display all matches
    dropdown_menu_btn = driver.find_element(By.CSS_SELECTOR, '.btn.mod-filter.js-dropdown')
    dropdown_menu_btn.click()
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, '.wf-dropdown.mod-all').find_element(By.LINK_TEXT, 'All Stages').click()
    time.sleep(2)
    
    # all matches are arranges according to date
    matches_day = driver.find_elements(By.CLASS_NAME, 'wf-card')
    
    for day in matches_day[1:]: # first element with class='wf-card' is the event header, skip it
        all_matches = day.find_elements(By.TAG_NAME, 'a') # find all clickable element in the div
        
        for match in all_matches: # find all matches & scrape
            # open match details in new tab
            action.key_down(Keys.CONTROL).click(match).key_up(Keys.CONTROL).perform()
            driver.switch_to.window(driver.window_handles[2])
            time.sleep(1) 
            
            # locate the maps div
            maps = driver.find_element(By.CSS_SELECTOR, ".vm-stats-gamesnav.noselect").find_elements(By.TAG_NAME, 'div')
            
            # scrape each maps in the map div
            for map in maps:
                scrape(map)
            
            driver.close()# return to previous page and continue next loop
            driver.switch_to.window(event_window)
            
    print("done")
    driver.close() # close event window
    time.sleep(3)
    driver.switch_to.window(main_tab_handle) # focus to event list

#### Function: Scrape the data required

In [3]:
def scrape(map):
    
    # TODO: scrape required data from page
    
    # Access data-disabled and data-game-id attribute value:
    disabled_value = map.get_attribute("data-disabled")
    game_id_value = map.get_attribute("data-game-id")
    
    # check the map condition (data-disabled="0") and (data-game-id != "all")
    if disabled_value == "0" and game_id_value != "all":
        map.click() # switch to respective map
        
    
    '''
    Steps:
    1) scrape the data from the table and insert to a pd dataframe (TBD)
    
    TBD: how to store the dataset (maybe all 10 players in 1 row)
    
    2) repeat the steps for each map
    '''
    
    time.sleep(2)
    return

#### Locate events list and start scrapping process

In [8]:
# initialize loop condition
page = driver.find_element(By.CLASS_NAME, 'action-container-pages')
span = page.find_element(By.CSS_SELECTOR, 'span.btn.mod-page.mod-active')
next_pages = span.find_elements(By.XPATH, 'following-sibling::a')

# while span (selected page) has next page, proceed
while len(next_pages) > 0:
    # locate completed event div
    events_div = driver.find_elements(By.CLASS_NAME, 'events-container-col')
    completed_events_div = events_div[1] # completed events always == second div

    # extract all clickable element into an array
    all_a = completed_events_div.find_elements(By.TAG_NAME, 'a')

    # scrape all completed event
    for a in all_a[:1]: # remove[:] when need to scrape all data
        getMatch(a)

    # find page div and go for next page
    page = driver.find_element(By.CLASS_NAME, 'action-container-pages')
    span = page.find_element(By.CSS_SELECTOR, 'span.btn.mod-page.mod-active')
    next_pages = span.find_elements(By.XPATH, 'following-sibling::a')
    
    # repeat the process until there is no next page
    if len(next_pages) > 0: 
        print(f'scrapping {span.text}')
        next_pages[0].click()
    else:
        print('completed for all page')

# repeat the process for all pages


scrapping data for Challengers League 2024 North America: Qualifiers...

KeyboardInterrupt: 

#### Store the dataframe as .csv for future use (model training / backup)

### Exit driver, complete scrapping

In [None]:
# quit driver after get the html
driver.quit()