# Scrape the odds
From Unibet using Selenium and Firefox.

In [1]:
import numpy as np
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from time import sleep

In [41]:
# Start the browser - Run this only 1 time
browser = webdriver.Firefox()

In [40]:
# Quit the browser - Run this only when you're done
browser.quit()

In [8]:
# choose a league (0-5)
currentLeague = 1

In [4]:
# the urls
url_0 = 'https://www.unibet.eu/betting#filter/football/'
urls = [url_0 + u for u in ['netherlands/eredivisie',
                            'spain/la_liga',
                            'germany/bundesliga',
                            'england/premier_league',
                            'france/ligue_1',
                            'italy/serie_a']]
urls

['https://www.unibet.eu/betting#filter/football/netherlands/eredivisie',
 'https://www.unibet.eu/betting#filter/football/spain/la_liga',
 'https://www.unibet.eu/betting#filter/football/germany/bundesliga',
 'https://www.unibet.eu/betting#filter/football/england/premier_league',
 'https://www.unibet.eu/betting#filter/football/france/ligue_1',
 'https://www.unibet.eu/betting#filter/football/italy/serie_a']

In [84]:
# create empty data frame to append the odds to
df = pd.DataFrame()

## Odds from unibet

In [85]:
# go to the url
for url in urls:
    print('Going to:', url)
    browser.get(url)

    # have a generic wait object for the driver
    wait = WebDriverWait(browser, 5)

    # wait until the accept cookies button is clickable
    try:
        element = wait.until(
            EC.element_to_be_clickable(
                (By.ID, "CybotCookiebotDialogBodyButtonAccept")
            )
        )
        # When its present, let's wait another sec
        #sleep(1)
        # click on it
        element.click()
        print('Cookie button found and clicked')
    except TimeoutException:
        # not found, but maybe already clicked before so continue
        pass
    finally:
        # wait until the cookie bar is really gone
        wait.until(
            EC.invisibility_of_element_located(
                (By.ID, "CybotCookiebotDialog")
            )
        )
        # Just to be sure
        #sleep(1)

    # wait for and find all dropdown elements, ignore the already expanded ones
    dropdowns = wait.until( 
        EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, ".KambiBC-event-groups \
            .KambiBC-collapsible-container:not(.KambiBC-expanded)")
        )
    )
    print(len(dropdowns), 'dropdowns found')

    # click on them
    for dropdown in dropdowns:
        dropdown.click()
        #sleep(np.pi)

    # wait for every match (n) to have 3 odds (m)
    tries = 0
    while True:
        # test
        tries += 1
        sleep(0.2)
        n = len(browser.find_elements_by_css_selector('.KambiBC-event-groups li.KambiBC-event-item'))
        m = len(browser.find_elements_by_css_selector('.KambiBC-event-groups button.KambiBC-mod-outcome'))

        if 3*n == m:
            # all odds present
            break

        if tries > 50:
            # over 10 sec
            break

    print(n, 'matches and', m, 'odds')

    # now get the beautiful source code
    soup = BeautifulSoup(browser.page_source, 'html5lib')

    # search (css select) for matches in the soup
    # TODO: skip currently live matches
    matches = soup.select('.KambiBC-event-groups li.KambiBC-event-item')

    # print amount found
    print("Matches found in soup:", len(matches))

    # Create empty list to be filled with 1 dict/match
    jobs = []

    for match in matches:
        # Create empty dictionary for this match
        job = {}

        # Find info on the page
        #job["date"] = match.find(class_="KambiBC-event-item__start-time--date").text # <- Doesnt work..?
        teams = match.find_all(class_="KambiBC-event-participants__name")
        job["home_team"] = teams[0].text
        job["away_team"] = teams[1].text
        odds = match.find_all(class_="KambiBC-mod-outcome__odds")
        job["odd_home_win"] = odds[0].text
        job["odd_tie"] = odds[1].text
        job["odd_away_win"] = odds[2].text

        # Add to the list
        jobs.append(job)

    # Append as data frame to main data frame
    df = pd.concat([df, pd.DataFrame(jobs)])

Going to: https://www.unibet.eu/betting#filter/football/netherlands/eredivisie
1 dropdowns found
9 matches and 27 odds
Matches found in soup: 9
Going to: https://www.unibet.eu/betting#filter/football/spain/la_liga
5 dropdowns found
20 matches and 60 odds
Matches found in soup: 20
Going to: https://www.unibet.eu/betting#filter/football/germany/bundesliga
4 dropdowns found
18 matches and 54 odds
Matches found in soup: 18
Going to: https://www.unibet.eu/betting#filter/football/england/premier_league
4 dropdowns found
21 matches and 63 odds
Matches found in soup: 21
Going to: https://www.unibet.eu/betting#filter/football/france/ligue_1
4 dropdowns found
20 matches and 60 odds
Matches found in soup: 20
Going to: https://www.unibet.eu/betting#filter/football/italy/serie_a
2 dropdowns found
12 matches and 36 odds
Matches found in soup: 12


In [86]:
df

Unnamed: 0,away_team,home_team,odd_away_win,odd_home_win,odd_tie
0,FC Groningen,Excelsior Rotterdam,2.95,2.35,3.40
1,Heracles Almelo,NAC Breda,2.35,2.95,3.40
2,Vitesse,PSV Eindhoven,9.00,1.35,5.00
3,De Graafschap,AZ Alkmaar,10.00,1.30,5.25
4,Willem II,Ajax,21.00,1.12,8.50
5,FC Emmen,SC Heerenveen,6.00,1.55,4.00
6,ADO Den Haag,FC Utrecht,4.50,1.75,3.75
7,PEC Zwolle,Fortuna Sittard,2.85,2.40,3.40
8,VVV Venlo,Feyenoord,10.00,1.30,5.25
0,Atlético Madrid,Leganés,1.62,6.75,3.50


### Data transformation

In [None]:
# Change team names to lowercase
df["home_team"] = df["home_team"].str.lower()
df["away_team"] = df["away_team"].str.lower()

# Remove all accents
df["home_team"] = df["home_team"].str.normalize('NFKD').str.encode(encoding='ascii',errors='ignore').str.decode('utf-8')
df["away_team"] = df["away_team"].str.normalize('NFKD').str.encode(encoding='ascii',errors='ignore').str.decode('utf-8')

# Convert to numbers
df["odd_home_win"] = pd.to_numeric(df["odd_home_win"])
df["odd_away_win"] = pd.to_numeric(df["odd_away_win"])
df["odd_tie"] = pd.to_numeric(df["odd_tie"])

# Show
df

In [None]:
# append to csv
df.to_csv('./data/This_month_odds.csv', index=False, mode='a')