# Python exam - Hltv.org scraper and CS:GO competition predictor

## Scraping the Hltv.org webpage

In [1]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
import bs4

#Setup webdriver
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0")
options = Options()
options.headless = True
browser = webdriver.Firefox(options=options)

## Team viewer

In [2]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

#Open webpage
base_url = "https://www.hltv.org"
browser.get(base_url)
browser.implicitly_wait(2)

#Dismiss cookie prompt
try:
    wait = WebDriverWait(browser, 100)
    element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection"]')))

    if browser.find_element_by_xpath('//*[@id="CybotCookiebotDialogBodyContent"]').is_displayed():
        print("Cookie prompt found")
        
        cookie_preferences = browser.find_element_by_xpath('//*[@id="CybotCookiebotDialogBodyLevelButtonPreferences"]')
        if cookie_preferences.is_selected():
            cookie_preferences.click()
            
        cookie_statistics = browser.find_element_by_xpath('//*[@id="CybotCookiebotDialogBodyLevelButtonStatistics"]')
        if cookie_statistics.is_selected():
            cookie_statistics.click()
            
        cookie_marketing = browser.find_element_by_xpath('//*[@id="CybotCookiebotDialogBodyLevelButtonMarketing"]')
        if cookie_marketing.is_selected():
            cookie_marketing.click()

    element.click()
    print("Cookie prompt dismissed")
except:
    print("Cookies not found")

Cookie prompt found
Cookie prompt dismissed


In [3]:
# #Enter world ranking page
button = browser.find_element_by_link_text('RANKING')
button.click()

soup = bs4.BeautifulSoup(browser.page_source, 'html.parser')

In [4]:
from IPython.display import display, HTML

CSS = """
.output {
    align-items: center;
}
"""

HTML('<style>{}</style>'.format(CSS))

In [5]:
import ipywidgets as widgets
import pandas as pd

#Set loading image
img = 'https://i.pinimg.com/originals/d7/34/49/d73449313ecedb997822efecd1ee3eac.gif'
pd.set_option('display.max_colwidth', None)


#Displaying teams from top 30
def path_to_image_html(path):
    return '<img src="'+ path + '" width="110" >'
            
def get_players_for_team(team_url):
    players = []
    browser.get(base_url + team_url)
    source = browser.page_source
    soup = bs4.BeautifulSoup(source, 'html.parser')
    players_table = soup.find('table', {'class' : 'table-container players-table'}).find_all('tr')
    for e in players_table[1:]:
        player_nick = e.find('div', {'class' : 'text-ellipsis'}).text.strip()
        player_img_url = e.find_all('img')[0]['src']
        player_status = e.find('div', {'class' : 'player-status'}).text
        player_stats = e.find_all('div', {'class' : 'players-cell center-cell opacity-cell'})
        player_tot = player_stats[0].text
        player_mp = player_stats[1].text
        player_rating = e.find('div', {'class' : 'rating-cell'}).text
        
        player = (player_img_url, player_nick, player_status, player_tot, player_mp, player_rating)
        players.append(player)

    return players

# def tryconvert(value, default, *types):
#     for t in types:
#         try:
#             return t(value)
#         except (ValueError, TypeError):
#             continue
#     return default

# def color_negative_red(val):
#     color = 'red' if tryconvert(val,0,float) < 1.05 else 'green'
#     return 'color: %s' % color

# styles = [
#     dict(selector='th:first-child', props=[('text-align', 'left')]),
#     dict(selector='th', props=[('color', 'blue')])
# ] 

def dropdown_teams_eventhandler(change):
    output.clear_output()
    with output:
        display(HTML('<img src="https://i.pinimg.com/originals/d7/34/49/d73449313ecedb997822efecd1ee3eac.gif">'))
        players = get_players_for_team(change.new)
        output.clear_output()
        players_df = pd.DataFrame(players, columns=['Image', 'Nickname', 'Status', 'Time on team', 'Maps played', 'Rating'])
        format_dict = {}
        format_dict['Image'] = path_to_image_html
#         players_df = players_df.style.set_table_styles(styles)
#         players_df = players_df.style.applymap(color_negative_red, subset=pd.IndexSlice[:, ['Rating']]).hide_index()
        display(HTML(players_df.to_html(escape=False, formatters=format_dict, index=False)))   
        
teams = []

team_elements = soup.find_all('div', {'class': 'ranked-team standard-box'})
for e in team_elements:
    team_rank = e.select('div>span:nth-child(1)')[0].text
    team_name = e.select('div>span:nth-child(1)')[1].text
    team_link = e.find_all('a', {'class': 'moreLink'})[0]['href']
    teams.append((team_rank + " - " + team_name, team_link))
    
output = widgets.Output(layout={'border': '1px solid black'})

        
dropdown_teams = widgets.Dropdown(options = teams, value=None)
dropdown_teams.observe(dropdown_teams_eventhandler, names='value')  
     
display(dropdown_teams)
output

Dropdown(options=(('#1 - Natus Vincere', '/team/4608/natus-vincere'), ('#2 - Vitality', '/team/9565/vitality')…

Output(layout=Layout(border='1px solid black'))

## Tournament predictor

In [6]:
browser.get(base_url)
browser.implicitly_wait(2)

button = browser.find_element_by_link_text('EVENTS')
button.click()

soup = bs4.BeautifulSoup(browser.page_source, 'html.parser')

In [7]:
output = widgets.Output(layout={'border': '1px solid black'})

events = []
featured_events = soup.find('div', {'class' : 'events-holder'})
# print(featured_events)
event_elements = featured_events.find_all('a', {'class' : ['a-reset', 'standard-box']})
print(len(event_elements))
all_events = []
for e in event_elements:
    if 'big-event' in e['class']:
        event_info = e.select("div[class=additional-info] > table > tbody > tr")[0].select("td")
        event_href = e['href']
        event_name = e.select("div[class=info] > div")[0].text
        event_date = event_info[0].text
        event_price = event_info[1].text
        event_teams = event_info[2].text
        if event_teams == 'TBA':
            break
        
    if 'small-event' in e['class']:
        event_info = e.select("tr")[0].find_all('td')
        event_href = e['href']
        event_name = event_info[0].select('div')[0].text
        event_teams = event_info[1].text
        event_prize = event_info[2].text
        event_date_info = e.select("tr[class=eventDetails] > td > span")[1].select("span > span >span")
        try:
            event_date = event_date_info[0].text + event_date_info[1].text
        except:
            event_date = event_date_info[0].text
        if event_teams == 'TBA':
            break
#     print(event_name, event_date, event_price, event_teams, event_href)
    all_events.append([(event_name, event_date, event_teams, event_price), event_href])
    
print(len(all_events))



23
14


In [98]:
from ipywidgets import Layout
from selenium.webdriver.support.ui import Select


output = widgets.Output(layout={'border': '1px solid black'})

def select_event_handler(change):
#     print(change.new)
    get_teams_for_event(change.new)
    
def get_teams_for_event(event_url):
    with output:
        output.clear_output()
        teams = []
        browser.get(base_url + event_url)
        soup = bs4.BeautifulSoup(browser.page_source, 'html.parser')
        teams_attending = soup.find('div', {'class' : 'teams-attending'}).select('div[class=col]')
        if get_confirmed_teams(teams_attending):
            team_ratings = get_team_rating(teams_attending)
            print("Predicted event winner:", predict_event_winner(team_ratings))
            
def predict_event_winner(team_ratings):
    return max(team_ratings, key=team_ratings.get)

def get_confirmed_teams(teams_attending):
    confirmed_teams = 0
    for team in teams_attending:
        if len(team.select('div[class=team-name] > a > div[class=text]')) > 0:
            confirmed_teams += 1

    if confirmed_teams < len(teams_attending):
        print(confirmed_teams, "team(s) of", len(teams_attending), "confirmed. Prediction unavailable")
        return False
    else:
        print("Prediction available")
        return True
        
def get_team_rating(teams_attending):
    team_ratings = {}
    for idx, e in enumerate(teams_attending):
        team_link = e.find('a')['href']
        team_name = e.find('a').find('div').text
        browser.get(base_url + "/stats" + team_link.replace('team', 'teams'))

        select = Select(browser.find_element_by_xpath('/html/body/div[2]/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/form/select'))
        select.select_by_visible_text('Last 3 months')
        
        soup = bs4.BeautifulSoup(browser.page_source, 'html.parser')
        stats = soup.find_all('div', {'class' : 'large-strong'})
        team_stats = []
        team_stats.append(stats[0].text)
        team_stats.append(stats[1].text.replace(" ", "").split('/'))
        team_stats.append(stats[4].text)
        team_stats.append(stats[5].text)
        rating = calc_team_rating(team_stats)
        team_ratings[team_name] = rating
        print(idx+1, "/", len(teams_attending), "stats collected")
    return team_ratings
    
def calc_team_rating(team_stats):
    games_played = float(team_stats[0])
    games_won = float(team_stats[1][0])
    games_lost = float(team_stats[1][2])
    rounds_played = float(team_stats[2])
    KDA = float(team_stats[3])
    rating = ((games_played * games_won / games_lost) / 1000) * rounds_played  * KDA
    return rating

events = [(" - ".join(info[0]), info[1]) for info in all_events] 
select_event = widgets.Select(
    options = events,
    value = None,
    disabled=False,
    layout=Layout(width='1000px', height='200px', align='center', justify_content='center')
)

select_event.observe(select_event_handler, names='value')
display(select_event)
output


Select(layout=Layout(height='200px', justify_content='center', width='1000px'), options=(('BLAST Premier World…

Output(layout=Layout(border='1px solid black'))