# Betting markets - capture odds - individual seats at sportsbet

Note: this notebook is for ease of testing. Convert to a python file and move to the automated directory.

To do this ...
```
jupyter nbconvert --to python "Betting markets - capture odds - individual seats at sportsbet.ipynb"
chmod 700 "Betting markets - capture odds - individual seats at sportsbet.py"
mv "Betting markets - capture odds - individual seats at sportsbet.py" ../automated ```

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Python-setup" data-toc-modified-id="Python-setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Python setup</a></span></li><li><span><a href="#Set-up-web-driver-options" data-toc-modified-id="Set-up-web-driver-options-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Set-up web-driver options</a></span></li><li><span><a href="#Extract-website-text-using-Selenium" data-toc-modified-id="Extract-website-text-using-Selenium-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Extract website text using Selenium</a></span></li><li><span><a href="#Extract-data-of-interest" data-toc-modified-id="Extract-data-of-interest-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Extract data of interest</a></span></li><li><span><a href="#Append-this-data-to-a-CSV-file" data-toc-modified-id="Append-this-data-to-a-CSV-file-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Append this data to a CSV file</a></span></li></ul></div>

## Python setup

In [1]:
# data science imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# web scraping imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

# System imports
import re
import datetime
from pathlib import Path
from time import sleep

## Set-up web-driver options

In [2]:
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--window-size=1920,1080')
options.add_argument('--headless')

## Extract website text using Selenium

In [3]:
urls = {
    'NSW': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-nsw-seats-6484922',
    'Vic': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-vic-seats-6495711',
    'Qld': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-qld-seats-6496304',
    'WA':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-wa-seats-6496079',
    'SA':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-sa-seats-6494014',
    'Tas': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-tas-seats-6484714',
    'ACT': 'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-act-seats-6484557',
    'NT':  'https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-nt-seats-6484664',
}

In [4]:
def capture(driver, state, url, verbose=False):
    SHOW_ALL = 'Show All'
    SHOW_LESS = 'Show Less'
    
    driver.get(url)
    if verbose: print(f'Got {state}')

    #electorates = driver.find_elements(By.XPATH, "//div[contains(@class, 'accordionItemMobileOrSingleMarketGroup_fukjuk5')]")
    electorates = driver.find_elements(By.XPATH, "//div[contains(@class, 'accordionItemDesktop_f1pa6f05')]")
    if verbose: print(f'Electorate count: {len(electorates)}')

    scraped = {}
    for e in electorates:
        blob = e.text
        if SHOW_ALL in blob:
            expandable = e.find_element(By.XPATH, f"//div/span[contains(., '{SHOW_ALL}')]")
            webdriver.ActionChains(driver).click(expandable).perform()
            blob = e.text
        ignore_last = SHOW_ALL in blob or SHOW_LESS in blob
        findings = blob.split('\n')
        seat_name = findings[0]
        odds_pairs = findings[2:-1] if ignore_last else findings[2:]
        it = iter(odds_pairs)
        scraped[seat_name] = dict(zip(it, it))
    
    if verbose: print(scraped)
    return scraped

In [5]:
# instantiate a web driver
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(220) 

In [6]:
# some test code
TEST = True
if TEST:
    driver.get(urls['NSW'])
    html = driver.page_source
    soup = BeautifulSoup(html, 'lxml')
    print(soup.prettify())
    
    assert(False)

<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Electorate Betting - NSW Seats betting odds - Australian Federal Politics | Sportsbet
  </title>
  <meta content="View the latest odds on Electorate Betting - NSW Seats Matches &amp; Bet with Sportsbet. Join Australia's Favourite Online Betting and Entertainment Website." data-automation-id="page-description" data-react-helmet="true" name="description"/>
  <meta charset="utf-8"/>
  <meta content="#142841" name="theme-color"/>
  <meta content="width=device-width, initial-scale=.999, maximum-scale=1.0, user-scalable=no" name="viewport"/>
  <meta content="no" name="apple-mobile-web-app-capable"/>
  <meta content="on" http-equiv="cleartype"/>
  <meta content="app-id=418221992" name="apple-itunes-app"/>
  <meta content="en" http-equiv="Content-Language"/>
  <link data-automation-id="canonical" data-react-helmet="true" href="https://www.sportsbet.com.au/betting/politics/australian-federal-politics/electorate-betting-nsw-seats-64

AssertionError: 

In [None]:
# capture the data we want
all_seats = {}
for state, url in urls.items():
    found = capture(driver, state, url, verbose=False)
    all_seats = {**all_seats, **found}
    sleep(5) # be a good citizen

In [None]:
len(all_seats) # should be 151 - one per seat in Parliament

In [None]:
driver.close()

## Extract data of interest

In [None]:
wide_data = pd.DataFrame(all_seats).T
len(wide_data)

In [None]:
long_data = wide_data.melt(value_vars=wide_data.columns, ignore_index=False)
long_data = long_data[long_data['value'].notna()]
new_index = pd.MultiIndex.from_arrays([np.repeat(datetime.datetime.now(), len(long_data)),long_data.index ])
long_data.index = new_index
long_data.index.names = ['datatime', 'seat']
long_data

## Append this data to a CSV file

In [None]:
# save to file
FILE = '../historical-data/sportsbet-2022-electorate-betting.csv'
long_data.to_csv(FILE, mode='a', index=True, header=False)