In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome import options
from selenium.webdriver.support.expected_conditions import staleness_of
from selenium.webdriver.support.wait import WebDriverWait
from datetime import datetime
import selenium
import json
import time

LEAGUES = {
    # start and end dates contain a margin
    "Brasileirao_Serie_A": {
        "url": "https://oddspedia.com/br/futebol/brasil/brasileirao-serie-a",
        "alias": "brasileirao-serie-a",
        "category": "brasil",
        "years": {
            2021: {
                "start_date": "2021-05-20",
                "end_date": "2021-12-20"
            },
            2022: {
                "start_date": "2022-04-01",
                "end_date": "2022-11-15"
            }
        }
    },
    "Brasileirao_Serie_B": {
        "url": "https://oddspedia.com/br/futebol/brasil/brasileirao-serie-b",
        "alias": "brasileirao-serie-b",
        "category": "brasil",
        "years": {
            2021: {
                "start_date": "2021-05-20",
                "end_date": "2021-11-30"
            },
            2022: {
                "start_date": "2022-04-01",
                "end_date": "2022-11-08"
            }
        }
    },
    "Libertadores": {
        "url": "https://oddspedia.com/br/futebol/america-do-sul/conmebol-libertadores",
        "alias": "conmebol-libertadores",
        "category": "america-do-sul",
        "years": {
            2021: {
                "start_date": "2021-02-15",
                "end_date": "2021-11-30"
            },
            2022: {
                "start_date": "2022-02-01",
                "end_date": "2022-11-01"
            }
        }
    },
}

API_BASE = "https://oddspedia.com/api/v1/getMatchOdds?wettsteuer=0&geoCode=BR&bookmakerGeoCode=BR&bookmakerGeoState=&language=br"

MARKET_INDEX = {1: 0, 3: 2, 4: 1, 8: 7}

def chunk_list(lst, n):
    return [lst[i: i + n] for i in range(0, len(lst), n)]

In [2]:
opt = options.Options()
driver = webdriver.Chrome(options=opt)

In [3]:
def scrape_league_ids(league: dict) -> dict:
    API_MATCH_LIST = "https://oddspedia.com/api/v1/getMatchList?excludeSpecialStatus=1&sortBy=default&perPageDefault=400&startDate={start_date}T03%3A00%3A00Z&endDate={end_date}T02%3A59%3A59Z&geoCode=BR&status=all&sport=futebol&popularLeaguesOnly=0&category={category}&league={alias}&round=&page=1&perPage=400&language=br".format

    all_matches = {}
    for year in league["years"]:
        all_matches[year] = {}
        current_season = league["years"][year]
        driver.get(
            API_MATCH_LIST(
                start_date=current_season["start_date"],
                end_date=current_season["end_date"],
                category=league["category"],
                alias=league["alias"]
            )
        )
        current_season_matches = json.loads(driver.find_element(By.TAG_NAME, 'pre').text)
        for match in current_season_matches["data"]["matchList"]:
            all_matches[year][match["id"]] = {
                "home": match["ht"],
                "away": match["at"],
                "home_score": match["hscore"],
                "away_score": match["ascore"],
                "round": match["league_round_name"]
            }
    
    return all_matches


In [4]:
def clean_odds(match_id, clean_data, full_odds, market):
    if market == "over/under":
        names = ["over", "under"]
    elif market == "spread":
        names = ["home", "away"]
    elif market == "exact":
        names = ["odd"]
    
    divisions = full_odds[market]["periods"][0]["odds"]
    odds_all = [*divisions["alternative"], *divisions["main"]]
    for odds_list_item in odds_all:
        clean_data[match_id][market][odds_list_item["name"]] = {}
        for _id, odd in odds_list_item["odds"].items():
            clean_data[match_id][market][odds_list_item["name"]][odd["bookie_name"]] = {
                n[i]: odd[f"o{i+1}"] for i, n in enumerate(names)
            }

    return

In [5]:
def scrape_ids_odds(game_ids: dict) -> dict:
    clean_data = {}
    IDS = {1: "h2h", 3: "spread", 4: "over/under", 8: "exact"}
    BASE_URL = "https://oddspedia.com/api/v1/getMatchOdds?wettsteuer=0&geoCode=BR&bookmakerGeoCode=BR&bookmakerGeoState=&matchId={match_id}&oddGroupId={odds_market}&inplay=0&language=br".format

    for match_id in game_ids:
        match_odds = {}
        clean_data[match_id] = {}
        for market in MARKET_INDEX.keys():
            clean_data[match_id][IDS[market]] = {}

            driver.get(BASE_URL(match_id = match_id, odds_market = market))
            incomplete_data = json.loads(driver.find_element(By.TAG_NAME, 'pre').text)
            
            current_market_data = incomplete_data["data"]["prematch"][MARKET_INDEX[market]]
            match_odds[IDS[market]] = current_market_data
            

        for odd in match_odds["h2h"]["periods"][0]["odds"]:
            clean_data[match_id]["h2h"][odd["bookie_name"]] = {
                "home": odd["o1"],
                "draw": odd["o2"],
                "away": odd["o3"]
            }

        clean_odds(match_id, clean_data, match_odds, "over/under")
        clean_odds(match_id, clean_data, match_odds, "spread")
        clean_odds(match_id, clean_data, match_odds, "exact")
        
    return clean_data

In [None]:
for league in LEAGUES:
    print(league)
    ids_found = scrape_league_ids(LEAGUES[league])

    for year in LEAGUES[league]["years"]:
        print(year)
        odds_found = scrape_ids_odds(ids_found[year])

        with open(f"{league}-{year}-odds.json", "w+") as outfile:
            json.dump(odds_found, outfile)

    with open(f"{league}-info.json", "w+") as outfile:
        json.dump(ids_found, outfile)

In [None]:
# Brasileirao - Serie B - 2022
# Missing: Sport - PE x Vasco da Gama, Rodada 35

# Libertadores - 2021
# Missing: Bolivar La Paz x Montevideo Wanderers FC, Qualificação - Jogo 2