In [169]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import os
import time
import numpy as np

# URL of the Tipico page
url = 'https://sports.tipico.de/de/alle/fussball/em-wetten/event/587999910?eventPanelMode=2&t=match'

# Specify the path to the Chrome browser executable
path_to_chrome = '/usr/bin/google-chrome'
path_to_brave = '/snap/bin/brave'
path = path_to_chrome

# Ensure the executable exists
if not os.path.exists(path):
    raise FileNotFoundError(f"The specified browser executable does not exist: {path}")

# Set up Selenium
options = Options()
options.binary_location = path
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Initialize the WebDriver
try:
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    print("Selenium WebDriver started...")
except Exception as e:
    print(f"Error initializing WebDriver: {e}")
    raise

# Open the page
try:
    driver.get(url)
    print(f"Page {url} opened...")

    # Wait for the cookie consent button to be clickable and click it
    WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.ID, '_evidon-accept-button'))).click()
    print("Cookie consent accepted...")

    # Wait for any overlaying elements to disappear
    time.sleep(2)  # Give some time for any overlaying elements to appear and be closed
    overlays = driver.find_elements(By.CLASS_NAME, 'ClickOutside-styles-module-default')
    for overlay in overlays:
        try:
            driver.execute_script("arguments[0].style.visibility='hidden'", overlay)
        except Exception as e:
            print(f"Failed to hide overlay: {e}")

    # Scroll the "Mehr anzeigen" button into view and click it
    mehr_anzeigen_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, 'Category-styles-module-toggle')))
    driver.execute_script("arguments[0].scrollIntoView(true);", mehr_anzeigen_button)
    driver.execute_script("arguments[0].click();", mehr_anzeigen_button)
    time.sleep(2)  # Wait for the additional content to load

    # Get the HTML content of the page
    html_content = driver.page_source
except Exception as e:
    print(f"Error opening the page or getting page source: {e}")
    driver.quit()
    raise

# Save the HTML content to a text file
with open("page_content.txt", "w", encoding="utf-8") as file:
    file.write(html_content)

print("HTML content saved to page_content.txt")

# Close the browser
driver.quit()

Selenium WebDriver started...
Page https://sports.tipico.de/de/alle/fussball/em-wetten/event/587999910?eventPanelMode=2&t=match opened...
Cookie consent accepted...
HTML content saved to page_content.txt


In [170]:
# Read the HTML content from the file
with open("page_content.txt", "r", encoding="utf-8") as file:
    html_content = file.read()

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Extract country names
teams_section = soup.find('section', class_='SoccerPreLive-styles-module-content')
teams = teams_section.find_all('div', class_='SoccerPreLive-styles-module-team-data')
country_names = [team.find('div', class_='SoccerPreLive-styles-module-team-name').text.strip() for team in teams]

# Extract match odds for "Tipp"
tipp_section = soup.find('div', class_='Category-styles-module-market-group')
tipp_odds_data = []

if tipp_section:
    odds_buttons = tipp_section.find_all('button', class_='OddResult-styles-module-odd-button')
    labels = ['1', 'X', '2']
    for button, label in zip(odds_buttons, labels):
        odds = button.find('div', class_='OddResult-styles-module-value-cell').text.strip()
        country = None
        if label == '1':
            country = country_names[0]  # Home team
        elif label == 'X':
            country = 'Draw'
        elif label == '2':
            country = country_names[1]  # Away team
        tipp_odds_data.append({'Country': country, 'Odds': odds})

# Convert the list of dictionaries to a DataFrame
tipp_odds_df = pd.DataFrame(tipp_odds_data)

# Extract match results and probabilities for "Ergebnis"
ergebnis_section = soup.find_all('div', class_='OddsDivided-styles-module-odds-divided')
ergebnis_data = []

for section in ergebnis_section:
    buttons = section.find_all('button', class_='OddResult-styles-module-odd-button')
    for button in buttons:
        label = button.find('div', class_='OddResult-styles-module-label-cell').text.strip()
        probability = button.find('div', class_='OddResult-styles-module-value-cell').text.strip()
        ergebnis_data.append({'Result': label, 'Probability': probability})

# Convert the list of dictionaries to a DataFrame
ergebnis_df = pd.DataFrame(ergebnis_data)

In [171]:

# Convert Odds to Probabilities (handle comma as decimal separator)
tipp_odds_df['Odds'] = tipp_odds_df['Odds'].str.replace(',', '.').astype(float)
tipp_odds_df['Probability'] = 1 / tipp_odds_df['Odds']
tipp_odds_df['Probability'] = tipp_odds_df['Probability'] / tipp_odds_df['Probability'].sum()

# Convert result odds to probabilities (handle comma as decimal separator)
ergebnis_df['Probability'] = ergebnis_df['Probability'].str.replace(',', '.').astype(float)
ergebnis_df = ergebnis_df[ergebnis_df['Probability'] != 250]  # Exclude rows with odds of 250
ergebnis_df = ergebnis_df[ergebnis_df['Result'] != 'X:X']  # Exclude 'X:X' result
ergebnis_df['Probability'] = 1 / ergebnis_df['Probability']
ergebnis_df['Probability'] = ergebnis_df['Probability'] / ergebnis_df['Probability'].sum()

# Extract country names
home_team = tipp_odds_df.loc[tipp_odds_df['Country'] != 'Draw', 'Country'].values[0]
away_team = tipp_odds_df.loc[tipp_odds_df['Country'] != 'Draw', 'Country'].values[1]

# Calculate expected goal differences
def calculate_goal_difference(result):
    home_goals, away_goals = map(int, result.split(':'))
    return home_goals - away_goals

ergebnis_df['Goal_Difference'] = ergebnis_df['Result'].apply(calculate_goal_difference)

# Create a table showing the probability of each goal difference
goal_difference_probabilities = ergebnis_df.groupby('Goal_Difference')['Probability'].sum().reset_index()
goal_difference_probabilities = goal_difference_probabilities.rename(columns={'Probability': 'Probability'})

# Print the dataframes for reference
print("Odds DataFrame:")
print(tipp_odds_df)
print("Results DataFrame:")
print(ergebnis_df.sort_values('Probability', ascending=False))
print("Goal Difference Probabilities:")
print(goal_difference_probabilities.sort_values('Probability', ascending=False))

Odds DataFrame:
      Country  Odds  Probability
0  Tschechien  2.45     0.388350
1        Draw  3.50     0.271845
2      Türkei  2.80     0.339806
Results DataFrame:
   Result  Probability  Goal_Difference
22    1:1     0.111288                0
0     1:0     0.074192                1
2     2:1     0.074192                1
31    1:2     0.074192               -1
29    0:1     0.066773               -1
23    2:2     0.055644                0
1     2:0     0.051364                2
21    0:0     0.047695                0
30    0:2     0.044515               -2
33    1:3     0.039278               -2
4     3:1     0.037096                2
34    2:3     0.033386               -1
32    0:3     0.026709               -3
5     3:2     0.026709                1
3     3:0     0.026709                3
24    3:3     0.022258                0
36    1:4     0.019078               -3
7     4:1     0.019078                3
37    2:4     0.016693               -2
35    0:4     0.016693           

In [172]:

# Calculate the expected points for guessing the outcome correctly
tipp_odds_df['Expectation'] = tipp_odds_df['Probability'] * 5

print(tipp_odds_df)

# Calculate the expected points for guessing the goal difference correctly
goal_difference_probabilities['Expectation'] = goal_difference_probabilities['Probability'] * 3

# Calculate the total expected points for each goal difference
goal_difference_probabilities['Expectation_sum'] = goal_difference_probabilities.apply(
    lambda row: tipp_odds_df.loc[tipp_odds_df['Country'] == home_team, 'Expectation'].values[0] + row['Expectation'] if row['Goal_Difference'] > 0 else
                tipp_odds_df.loc[tipp_odds_df['Country'] == away_team, 'Expectation'].values[0] + row['Expectation'] if row['Goal_Difference'] < 0 else
                tipp_odds_df.loc[tipp_odds_df['Country'] == 'Draw', 'Expectation'].values[0] + row['Expectation'],
    axis=1
)

print(goal_difference_probabilities)


      Country  Odds  Probability  Expectation
0  Tschechien  2.45     0.388350     1.941748
1        Draw  3.50     0.271845     1.359223
2      Türkei  2.80     0.339806     1.699029
    Goal_Difference  Probability  Expectation  Expectation_sum
0                -5     0.006677     0.020032         1.719061
1                -4     0.025040     0.075120         1.774149
2                -3     0.052464     0.157393         1.856423
3                -2     0.104938     0.314815         2.013844
4                -1     0.183891     0.551672         2.250701
5                 0     0.242450     0.727349         2.086572
6                 1     0.184633     0.553898         2.495645
7                 2     0.109605     0.328814         2.270562
8                 3     0.054134     0.162401         2.104149
9                 4     0.027822     0.083466         2.025214
10                5     0.008347     0.025040         1.966787


In [175]:
# Multiply ergebnis_df probabilities with 2
ergebnis_df['Expectation'] = ergebnis_df['Probability'] * 2

# Add the expectation_sum to the score expectation
def add_goal_difference_expectation(row):
    goal_diff = row['Goal_Difference']
    goal_diff_expectation = goal_difference_probabilities.loc[goal_difference_probabilities['Goal_Difference'] == goal_diff, 'Expectation_sum'].values[0]
    return row['Expectation'] + goal_diff_expectation

ergebnis_df['Total_Expectation'] = ergebnis_df.apply(add_goal_difference_expectation, axis=1)

# Sort the results by Total_Expectation
ergebnis_df = ergebnis_df.sort_values(by='Total_Expectation', ascending=False)

# Print the sorted DataFrame
print("Sorted Results DataFrame:")
print(ergebnis_df)

Sorted Results DataFrame:
   Result  Probability  Goal_Difference  Expectation  Total_Expectation
0     1:0     0.074192                1     0.148384           2.644030
2     2:1     0.074192                1     0.148384           2.644030
5     3:2     0.026709                1     0.053418           2.549064
9     4:3     0.009539                1     0.019078           2.514723
31    1:2     0.074192               -1     0.148384           2.399086
29    0:1     0.066773               -1     0.133546           2.384247
1     2:0     0.051364                2     0.102728           2.373289
4     3:1     0.037096                2     0.074192           2.344754
34    2:3     0.033386               -1     0.066773           2.317474
22    1:1     0.111288                0     0.222577           2.309148
8     4:2     0.016693                2     0.033386           2.303948
13    5:3     0.004452                2     0.008903           2.279465
38    3:4     0.009539               -

In [174]:
# check if probability sum is 1
print('Sum of probabilities: ', np.round(tipp_odds_df['Probability'].sum(), 4))
print('Sum of probabilities: ', np.round(ergebnis_df['Probability'].sum(), 4))
print('Sum of probabilities: ', np.round(goal_difference_probabilities['Probability'].sum(), 4))

# print sum of expectations
print('Sum of expectations of correct winner: ', np.round(tipp_odds_df['Expectation'].sum(), 4))
print('Sum of expectations of correct score: ', np.round(ergebnis_df['Expectation'].sum(), 4))
print('Sum of expectations of correct goal difference: ', np.round(goal_difference_probabilities['Expectation'].sum(), 4))

Sum of probabilities:  1.0
Sum of probabilities:  1.0
Sum of probabilities:  1.0
Sum of expectations of correct winner:  5.0
Sum of expectations of correct score:  2.0
Sum of expectations of correct goal difference:  3.0
