In [35]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import os
import time

# URL of the Bwin page
url = 'https://sports.bwin.de/de/sports/events/polen-niederlande-2:6451440?tab=score'

# Specify the path to the Brave or Chrome browser executable
path_to_chrome = '/usr/bin/google-chrome'
path_to_brave = '/snap/bin/brave'
path = path_to_chrome

In [36]:
# Ensure the executable exists
if not os.path.exists(path):
    raise FileNotFoundError(f"The specified browser executable does not exist: {path}")

# Set up Selenium
options = Options()
options.binary_location = path
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Initialize the WebDriver
try:
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    print("Selenium WebDriver started...")
except Exception as e:
    print(f"Error initializing WebDriver: {e}")
    raise

# Open the page
try:
    driver.get(url)
    print(f"Page {url} opened...")
    
    # Wait for the page to load by waiting for a specific element to be present
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'option-pick')))
    time.sleep(5)  # Additional delay to ensure page content is fully loaded
    
    # Scroll to the bottom of the page to load all dynamic content
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Wait for additional content to load
except Exception as e:
    print(f"Error opening the page: {e}")
    driver.quit()
    raise

# Get the HTML content of the page
try:
    html_content = driver.page_source
except Exception as e:
    print(f"Error getting page source: {e}")
    driver.quit()
    raise

# Save the HTML content to a text file
with open("page_content.txt", "w", encoding="utf-8") as file:
    file.write(html_content)

print("HTML content saved to page_content.txt")

# Close the browser
driver.quit()

Selenium WebDriver started...
Page https://sports.bwin.de/de/sports/events/polen-niederlande-2:6451440?tab=score opened...
HTML content saved to page_content.txt


In [37]:
# Extract teams from the URL
teams = url.split('/')[-1].split('-')[0:2]
team1 = teams[0].capitalize()
team2 = teams[1].capitalize()
print(f"Teams: {team1} vs {team2}")

# Read the HTML content from the file
with open("page_content.txt", "r", encoding="utf-8") as file:
    html_content = file.read()

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Extract match result (excluding over/under goals)
event_picks = soup.find_all('ms-event-pick', class_='option-pick')
match_result = {}

for pick in event_picks:
    name_tag = pick.find('div', class_='name')
    value_tag = pick.find('div', class_='value')
    if name_tag and value_tag:
        team_name = name_tag.text.strip()
        odds = value_tag.text.strip()
        if 'Over' not in team_name and 'Under' not in team_name:
            match_result[team_name] = odds

print(f'Match Result: {match_result}')

# Extract over/under goals within specified container class
over_under_goals = []
option_groups = soup.find_all('div', class_='option-group-container over-under-container triple')

for group in option_groups:
    goal_type_elements = group.find_all('div', class_='name ng-star-inserted')
    probability_elements = group.find_all('div', class_='value option-value ng-star-inserted')
    
    for goal_type, probability in zip(goal_type_elements, probability_elements):
        over_under_goals.append({
            'Goal Type': goal_type.text.strip(),
            'Probability': probability.text.strip()
        })

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(over_under_goals)
print(df)


Teams: Polen vs Niederlande
Match Result: {}
Empty DataFrame
Columns: []
Index: []
