# Betting markets - capture odds - individual seats at sportsbet

Note: this notebook is for ease of testing. Convert to a python file and move to the automated directory.

To do this ...
```
jupyter nbconvert --to python "Betting markets - capture odds - individual seats at sportsbet.ipynb"
chmod 700 "Betting markets - capture odds - individual seats at sportsbet.py"
mv "Betting markets - capture odds - individual seats at sportsbet.py" ../automated ```

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Python-setup" data-toc-modified-id="Python-setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Python setup</a></span></li><li><span><a href="#Set-up-web-driver-options" data-toc-modified-id="Set-up-web-driver-options-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Set-up web-driver options</a></span></li><li><span><a href="#Extract-website-text-using-Selenium" data-toc-modified-id="Extract-website-text-using-Selenium-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Extract website text using Selenium</a></span></li><li><span><a href="#Extract-data-of-interest" data-toc-modified-id="Extract-data-of-interest-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Extract data of interest</a></span></li><li><span><a href="#Append-this-data-to-a-CSV-file" data-toc-modified-id="Append-this-data-to-a-CSV-file-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Append this data to a CSV file</a></span></li></ul></div>

## Python setup

In [1]:
# data science imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# web scraping imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# System imports
import re
import datetime
from pathlib import Path
from time import sleep

## Set-up web-driver options

In [2]:
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--window-size=1920,1080')
options.add_argument('--headless')

## Extract website text using Selenium

In [3]:
urls = {
    'NSW': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-NSW-Seats-5878289',
    'Vic': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-VIC-Seats-6054105',
    'Qld': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-QLD-Seats-6227453',
    'WA':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-WA-Seats-6240412',
    'SA':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-SA-Seats-6240454',
    'Tas': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-TAS-Seats-6225404',
    'ACT': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/next-federal-election-act-seats-5849944',
    'NT':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/Next-Federal-Election-NT-Seats-6225384',
}

In [4]:
def capture(driver, state, url, verbose=False):
    SHOW_ALL = 'Show All'
    
    driver.get(url)
    if verbose: print(f'Got {state}')

    electorates = driver.find_elements(By.XPATH, "//div[contains(@class, 'accordionItemMobileOrSingleMarketGroup_fukjuk5')]")
    if verbose: print(f'Electorate count: {len(electorates)}')

    scraped = {}
    for e in electorates:
        blob = e.text
        if SHOW_ALL in blob:
            expandable = e.find_element(By.XPATH, f"//div/span[contains(., '{SHOW_ALL}')]")
            webdriver.ActionChains(driver).click(expandable).perform()
            blob = e.text
        findings = blob.split('\n')
        seat_name = findings[0]
        odds_pairs = findings[2:-1]
        it = iter(odds_pairs)
        scraped[seat_name] = dict(zip(it, it))
    
    if verbose: print(scraped)
    return scraped

In [5]:
# get the web page text
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(220) 

In [6]:
all_seats = {}
for state, url in urls.items():
    found = capture(driver, state, url)
    all_seats = {**all_seats, **found}
    sleep(5) # be a good citizen

In [7]:
driver.close()

## Extract data of interest

In [8]:
wide_data = pd.DataFrame(all_seats).T
len(wide_data)

151

In [9]:
long_data = wide_data.melt(value_vars=wide_data.columns, ignore_index=False)
long_data = long_data[long_data['value'].notna()]
new_index = pd.MultiIndex.from_arrays([np.repeat(datetime.datetime.now(), len(long_data)),long_data.index ])
long_data.index = new_index
long_data.index.names = ['datatime', 'seat']
long_data

Unnamed: 0_level_0,Unnamed: 1_level_0,variable,value
datatime,seat,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-27 15:46:29.482288,Banks (NSW),Coalition,1.40
2022-01-27 15:46:29.482288,Barton (NSW),Coalition,8.00
2022-01-27 15:46:29.482288,Bennelong (NSW),Coalition,1.20
2022-01-27 15:46:29.482288,Berowra (NSW),Coalition,1.01
2022-01-27 15:46:29.482288,Blaxland (NSW),Coalition,12.00
2022-01-27 15:46:29.482288,...,...,...
2022-01-27 15:46:29.482288,Bass (TAS),Jacqui Lambie Network,18.00
2022-01-27 15:46:29.482288,Clark (TAS),Jacqui Lambie Network,34.00
2022-01-27 15:46:29.482288,Lyons (TAS),Jacqui Lambie Network,21.00
2022-01-27 15:46:29.482288,Franklin (TAS),Jacqui Lambie Network,21.00


## Append this data to a CSV file

In [10]:
# save to file
FILE = '../historical-data/sportsbet-2022-individual-seats.csv'
long_data.to_csv(FILE, mode='a', index=True, header=False)