# Capture odds - election winner at sportsbet

In [None]:
"""Notebook - for testing the betting data capture code."""

'Notebook - for testing the betting data capturecode.'

Note: this notebook is for ease of testing. Convert to a python file and move to the automated directory.

To do this ...
```
jupyter nbconvert --to python capture_odds_election_winner-at-sportsbet.ipynb

chmod 700 

mv  capture_odds_election_winner-at-sportsbet.ipynb.py ../automated/_Capture-odds-election-winner-at-sportsbet.py
```

crontab -e
```
# set-up
SHELL=/bin/zsh

# run the election odds capture ...
58 6 * * * /Users/bryanpalmer/Australian-Federal-Election-2028/automated/election-winner.sh
```

Shell file called by cron:
```
#!/bin/zsh
#crontab: 57 6 * * * /Users/bryanpalmer/Australian-Federal-Election-2028/automated/election-winner.sh

# set-up parameters
home=/Users/bryanpalmer
working=Australian-Federal-Election-2028/automated
runrun=_Capture-odds-election-winner-at-sportsbet.py

# move to the home directory
cd $home

# activate the uv environment
source ~/.venv/uv313/bin/activate

# move to the working directory
cd $working

#initiate the data capture
python ./$runrun >>winner-log.log 2>> winner-err.log

# update git
git commit "../betting-data/sportsbet-2028-election-winner.csv" -m "betting market update"
git push
```

## Python setup

In [2]:
# --- System imports
from pathlib import Path

# --- data science imports
import pandas as pd
from bs4 import BeautifulSoup

# --- web scraping imports
import webdriver_manager
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService

print(f"webdriver_manager version: {webdriver_manager.__version__}")

webdriver_manager version: 4.0.2


## Extract website text using Selenium

In [3]:
def get_soup() -> BeautifulSoup:
    """
    Get the soup object from the Sportsbet page.

    Returns:
        BeautifulSoup: Parsed HTML content of the page.
    """

    # Note you might need to delete the webdriver-manager cache if you get an error
    # rm -rf ~/.wdm

    _service = ChromeService(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument("--ignore-certificate-errors")
    options.add_argument("--incognito")
    options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)
    driver.implicitly_wait(5)
    url = (
        "https://www.sportsbet.com.au/betting/politics/australian-federal-politics/"
        + "49th-parliament-of-australia-9232392"
        # inactive from 21 May 2025
        # "https://www.sportsbet.com.au/betting/politics/australian-federal-politics/"
        # + "49th-parliament-of-australia-9186904"
    )
    driver.get(url)
    soup_ = BeautifulSoup(driver.page_source, "lxml")
    driver.close()

    return soup_


soup = get_soup()

In [4]:
# print(soup.prettify())

## Extract data of interest

In [5]:
def extract_data(b_soup: BeautifulSoup) -> dict[str, str]:
    """
    Extract odds data (names/prices) from the b_soup object.

    Args:
        b_soup (BeautifulSoup): Parsed HTML content of the page.

    Returns:
        pd.DataFrame: DataFrame containing the extracted data.
    """

    # --- Find all the rows in the table
    # _rows = b_soup.find_all("div", {"data-automation-id": "market-row"})

    # --- Get to the relevant rows
    # Step one - get to the right bit of the page ...
    # <div class="background_fja218n" data-automation-id="content-background">
    div_name = "content-background"  # ===> fragile <===
    div = b_soup.find_all("div", {"data-automation-id": div_name})[0]
    # print(div.prettify())

    # --- Locate odds
    odds = div.find_all(  # type: ignore[union-attr]
        "div", {"class": "outcomeCardItems_f4kk892"}
    )  # ===> fragile <===
    len(odds)  # should be two: for a names row and a prices row

    # --- Get the names and prices (as text)
    found_ = {}
    for k, v in zip(odds[0].contents, odds[1].contents):  # type: ignore[union-attr]
        found_[k.text] = v.text

    return found_


names_prices = extract_data(soup)

In [6]:
# names_prices

## Append this data to a CSV file

In [7]:
def append_to_file(nap: dict[str, str]) -> pd.DataFrame:
    """`Append the names and prices to the CSV file.

    Args:
        nap (dict[str, str]): Dictionary containing names and prices.

    Returns:
        pd.DataFrame: DataFrame containing the appended data.
    """

    # --- convert to a DataFrame in long format
    df = pd.DataFrame([nap.keys(), nap.values()], index=["variable", "value"]).T
    df.index = pd.DatetimeIndex([pd.Timestamp.now()] * len(df))
    df.index.name = "datetime"

    # --- save to file
    file_dir = "../betting-data"
    Path(file_dir).mkdir(parents=True, exist_ok=True)
    file_path = f"{file_dir}/sportsbet-2028-election-winner.csv"
    df.to_csv(file_path, mode="a", index=True, header=False)

    return df


append_to_file(names_prices)

Unnamed: 0_level_0,variable,value
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-05-30 11:18:33.039392,Labor,1.3
2025-05-30 11:18:33.039392,Liberal and/or Nationals,3.5
2025-05-30 11:18:33.039392,Any Other Result,26.0
