# Capture odds - individual seats at sportsbet

Note: this notebook is for ease of testing. Convert to a python file and move to the automated directory.

To do this ...
```
jupyter nbconvert "_capture-odds-individual-seats-sportsbet.ipynb" --to python 
chmod 700 "_capture-odds-individual-seats-sportsbet.py"
mv "_capture-odds-individual-seats-sportsbet.py" ../automated 
```

Shell file called by cron:
```
#!/bin/zsh
#crontab: 57 7 * * * /Users/bryanpalmer/Australian-Federal-Election-2025/automated/electorate-betting.sh

# set-up parameters
home=/Users/bryanpalmer
working=Australian-Federal-Election-2025/automated
runrun=_capture-odds-individual-seats-sportsbet.py
mmenv=313

# move to the home directory
cd $home

# move to the working directory
cd $working

#initiate the data capture
$home/micromamba/envs/$mmenv/bin/python ./$runrun >>log-eb.log 2>> err-eb.log

# update git
git commit "../betting-data/sportsbet-2025-electorate-betting.csv" -m "data update"
git push
```

## Python setup

In [1]:
# System imports
from time import sleep

# data science imports
import pandas as pd

# web scraping imports
from bs4 import BeautifulSoup
import webdriver_manager
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService

print(f"webdriver_manager version: {webdriver_manager.__version__}")

webdriver_manager version: 4.0.2


## Selenium - get web page

In [2]:
def capture_url(
    url: str = (
        "https://www.sportsbet.com.au/betting/politics/"
        + "australian-federal-politics/electorate-betting-8866861"
    ),
) -> BeautifulSoup:
    """Get the web page and return a BeautifulSoup object."""

    # Note you might need to delete the webdriver-manager cache if you get an error
    # rm -rf ~/.wdm

    # set up the webdriver
    service = ChromeService(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument("--ignore-certificate-errors")
    options.add_argument("--incognito")
    options.add_argument("--headless")

    # fire up the driver
    driver = webdriver.Chrome(service=service, options=options)
    driver.implicitly_wait(15)

    # get the page
    driver.get(url)

    # open up all of the chevrons ...
    action = webdriver.ActionChains(driver)
    for rounds in range(3):
        unexpanded_divs = driver.find_elements(
            By.XPATH, "//div[@data-automation-id='chevron-closed']"
        )
        if len(unexpanded_divs) == 0:
            break
        print(f"Found {len(unexpanded_divs)} unexpanded divs")
        for div in unexpanded_divs:
            action.move_to_element(div)
            action.click()
            action.perform()
            sleep(1)

    # convert to soup
    soup = BeautifulSoup(driver.page_source, "lxml")
    driver.close()

    return soup


SOUP = capture_url()

Found 7 unexpanded divs
Found 104 unexpanded divs


In [3]:
# to see what we are working with
if False:
    with open("scraped-data.html", "w") as f:
        f.write(SOUP.prettify())

## Extract seat odds

In [4]:
def extract_seat_data(soup: BeautifulSoup) -> pd.DataFrame:
    """Extract the seat data from the soup object."""

    # find all the seat divs
    divs = soup.find_all(
        "div", {"class": "content_f1sk4ot6 divider_ffir01h"}
    ) + soup.find_all("div", {"class": "contentWithRoundedBottomBorders_fm4pkx"})
    print(f"Found {len(divs)} divs")

    # extract the candidates and odds data
    mow = pd.Timestamp.now()
    extract = pd.DataFrame()
    for div in divs:
        seat = div.find(
            "span", {"data-automation-id": "event-accordion-title"}
        ).text.strip()
        parties_soup = div.find_all("div", {"class": "eventMarketTextWrapper_fkhz08q"})
        if not len(parties_soup):
            parties_soup = div.find_all("div", {"class": "outcomeName_f2fqq0v"})
        parties = [party.text.strip() for party in parties_soup]
        prices_soup = div.find_all("div", {"class": "priceText_f71sibe"})
        prices = [price.text.strip() for price in prices_soup]

        for party, price in zip(parties, prices):
            row = pd.DataFrame(
                {
                    "seat": [seat],
                    "party": [party],
                    "price": [price],
                    "timestamp": [mow],
                }
            )
            extract = pd.concat([extract, row], ignore_index=True)
    extract = extract.sort_values(by=["seat", "price"]).reset_index(drop=True)
    return extract


ODDS = extract_seat_data(SOUP)

Found 150 divs


## Append this data to a CSV file

In [5]:
# save to file
FILE = "../betting-data/sportsbet-2025-electorate-betting.csv"
ODDS.to_csv(FILE, mode="a", index=True, header=False)