In [None]:
import bs4
import helium
import selenium
import sqlite3
import time

options = selenium.webdriver.FirefoxOptions()
options.add_argument("--width=1024")
options.add_argument("--height=768")
options.add_argument("-headless")

In [None]:
def create_db_sources(year):
    with sqlite3.connect(f'data/sources/sources_{year}.db') as con:
        query = f'''CREATE TABLE sources (
            id TEXT NOT NULL PRIMARY KEY,
            summary_source TEXT,
            stats_source TEXT,
            odds_source TEXT)
            '''
        cur = con.cursor()
        cur.execute(query)

In [None]:
def populate_db_sources(year):
    ids = get_ids_db_sources(year)
    populate_db_sources_per_ids(ids, year)

In [None]:
def get_ids_db_sources(year):
    url = f'flashscore.com/basketball/usa/nba-{year}-{year+1}/results/'
    with helium.start_firefox(options=options) as driver:
        helium.go_to(url)
        time.sleep(2)
        source = get_source(driver)
    ids = get_ids(source)
    return ids


def get_source(driver):
    end = False
    while not end:
        try:
            driver.execute_script(
                'window.scrollTo(0, document.body.scrollHeight);')
            helium.click('Show more matches')
            time.sleep(2)
        except LookupError:
            end = True
    source = driver.page_source
    return source


def get_ids(source):
    soup = bs4.BeautifulSoup(source, 'html.parser')
    ids = soup.find_all('div', {'class': 'event__match'})
    ids = [id_['id'] for id_ in ids]
    ids = [id_.split('_')[-1] for id_ in ids]
    return ids

In [None]:
def populate_db_sources_per_ids(ids, year):
    with helium.start_firefox(options=options) as driver:
        for i, id_ in enumerate(ids):
            print(f'{id_} {i+1}/{len(ids)}')
            values = get_values_db_sources(id_, driver)
            values['id'] = id_
            with sqlite3.connect(f'data/sources/sources_{year}.db') as con:
                query = f'''INSERT INTO sources VALUES (
                    :id,
                    :summary_source,
                    :stats_source,
                    :odds_source)
                    '''
                cur = con.cursor()
                cur.execute(query, values)

In [None]:
def get_values_db_sources(id_, driver):
    keys = [
        'summary_source',
        'stats_source',
        'odds_source',
    ]
    values = {key: None for key in keys}
    url = f'flashscore.com/match/{id_}/'

    href_summary = '#match-summary'
    helium.go_to(f'{url}{href_summary}')
    time.sleep(2)
    soup = bs4.BeautifulSoup(driver.page_source, 'html.parser')
    soup = soup.find('div', {'class': 'container__detail'})
    values['summary_source'] = str(soup)

    href_stats = '#match-summary/match-statistics'
    if soup.find_all('a', {'href': href_stats}):
        helium.go_to(f'{url}{href_stats}')
        time.sleep(2)
        soup = bs4.BeautifulSoup(driver.page_source, 'html.parser')
        soup = soup.find('div', {'class': 'container__detail'})
        values['stats_source'] = str(soup)

    href_odds = '#odds-comparison'
    if soup.find_all('a', {'href': href_odds}):
        helium.go_to(f'{url}{href_odds}')
        time.sleep(2)
        soup = bs4.BeautifulSoup(driver.page_source, 'html.parser')
        soup = soup.find('div', {'class': 'container__detail'})
        values['odds_source'] = str(soup)

    return values

In [None]:
create_db_sources(1995)
populate_db_sources(1995)