In [None]:
from webscraping import *

In [None]:
def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')               # Executa em modo headless (sem interface gráfica)
    options.add_argument('--disable-gpu')            # Melhor performance
    options.add_argument('--no-sandbox')             # Necessário para execução em alguns servidores
    options.add_argument('--disable-dev-shm-usage')  # Evita problemas de memória
    service = Service('./chromedriver.exe')          # Caminho para o chromedriver
    driver = webdriver.Chrome(service=service, options=options)
    driver.maximize_window()
    return driver

In [None]:
def process_url(url, colecao_partidas):
    """
    Processa uma URL para extrair dados dos jogos e armazená-los no banco de dados.
    """
    driver = setup_driver()
    try:
        driver.get(url)
        if 'No match found.' in driver.page_source:
            logging.info(f"Nenhuma partida encontrada na URL: {url}")
            return

        # Aceitar cookies (se necessário)
        try:
            WebDriverWait(driver, 8).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, 'button#onetrust-accept-btn-handler'))
            )
            driver.find_element(By.CSS_SELECTOR, 'button#onetrust-accept-btn-handler').click()
        except TimeoutException:
            logging.info("Cookies já foram aceitos ou não encontrados.")

        # Carregar mais eventos, se disponíveis
        while 'event__more' in driver.page_source:
            try:
                button_more = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable(
                        (By.CSS_SELECTOR, 'a.event__more.event__more--static')
                    )
                )
                driver.execute_script("arguments[0].click();", button_more)
                sleep(2)
            except TimeoutException:
                logging.error("Timeout ao tentar carregar mais eventos.")
                break

        # Extrair IDs dos jogos
        jogos = driver.find_elements(By.CSS_SELECTOR, 'div.event__match--twoLine')
        id_jogos = [jogo.get_attribute("id")[4:] for jogo in jogos if jogo.get_attribute("id")]

        # Extrair temporada
        try:
            season = driver.find_element(By.CSS_SELECTOR, 'div.heading__info').text
        except Exception as e:
            logging.error(f"Erro ao extrair temporada: {e}")
            season = "Unknown"

        logging.info(f"Processando temporada: {season}")

        # Processar cada jogo encontrado
        for id_jogo in tqdm(id_jogos, desc=f"Processando {driver.title}"):
            query = Query()
            if not colecao_partidas.search(query.Id == id_jogo):
                try:
                    jogo = {}
                    Dados_Jogo(id_jogo, jogo, driver) 
                    Temporada(jogo, season)  
                    Odds_Math_Odds_HT(id_jogo, jogo, driver)
                    Odds_Over_Under_HT(id_jogo, jogo, driver)
                    Odds_Math_Odds_FT(id_jogo, jogo, driver)
                    Odds_Over_Under_FT(id_jogo, jogo, driver)
                    Odds_BTTS(id_jogo, jogo, driver)
                    Odds_Dupla_Chance(id_jogo, jogo, driver)
                    Goals_HT_FT(id_jogo, jogo, driver)
                    Minutos_dos_Gols(id_jogo, jogo, driver)

                    colecao_partidas.insert(jogo.copy())
                    logging.info(f"Jogo {id_jogo} inserido com sucesso.")
                except Exception as e:
                    logging.error(f"Erro ao processar jogo {id_jogo}: {e}")
    except Exception as e:
        logging.error(f"Erro ao processar URL {url}: {e}")
    finally:
        driver.quit()

In [None]:
links_2025 = [
    'https://www.flashscore.com/football/argentina/torneo-betano-2025/results/'
    ]

In [None]:
db = TinyDB('./base_flashscore_2025.json')
for url in links_2025:
    logging.info(f"Iniciando processamento para: {url}")
    process_url(url, db)
logging.info("Processamento concluído.")

In [3]:
base_2025 = TinyDB('base_flashscore_2025.json')

df1 = pd.DataFrame(base_2025.all())

df = pd.concat([df1])
df.drop_duplicates(subset=['Id'], inplace=True)

df[['Dia','Mes','Ano']] = df['Date'].str.split('/',expand=True)
df = df.drop(['Date'], axis=1)
df['Date'] = df['Ano']+'-'+df['Mes']+'-'+ df['Dia']
df = df.drop(columns=['Dia','Mes','Ano'], axis=1)

df = df[['Id','League','Season','Round','Date','Time','Home','Away',
         'Odd_H_HT','Odd_D_HT','Odd_A_HT',
         'Odd_Over05_HT','Odd_Under05_HT','Odd_Over15_HT','Odd_Under15_HT','Odd_Over25_HT','Odd_Under25_HT',
         'Odd_H_FT','Odd_D_FT','Odd_A_FT',
         'Odd_Over05_FT','Odd_Under05_FT','Odd_Over15_FT','Odd_Under15_FT','Odd_Over25_FT','Odd_Under25_FT',
         'Odd_Over35_FT','Odd_Under35_FT','Odd_Over45_FT','Odd_Under45_FT',
         'Odd_BTTS_Yes','Odd_BTTS_No',
         'Odd_1X','Odd_12','Odd_X2',
         'Goals_H_HT','Goals_A_HT','Goals_H_FT','Goals_A_FT','Goals_Minutes_Home','Goals_Minutes_Away']]
df.fillna(0, inplace=True)

df = df.sort_values(by='Date')
df = drop_reset_index(df)

display(df)

Unnamed: 0,Id,League,Season,Round,Date,Time,Home,Away,Odd_H_HT,Odd_D_HT,Odd_A_HT,Odd_Over05_HT,Odd_Under05_HT,Odd_Over15_HT,Odd_Under15_HT,Odd_Over25_HT,Odd_Under25_HT,Odd_H_FT,Odd_D_FT,Odd_A_FT,Odd_Over05_FT,Odd_Under05_FT,Odd_Over15_FT,Odd_Under15_FT,Odd_Over25_FT,Odd_Under25_FT,Odd_Over35_FT,Odd_Under35_FT,Odd_Over45_FT,Odd_Under45_FT,Odd_BTTS_Yes,Odd_BTTS_No,Odd_1X,Odd_12,Odd_X2,Goals_H_HT,Goals_A_HT,Goals_H_FT,Goals_A_FT,Goals_Minutes_Home,Goals_Minutes_Away
1,fLYbyZmS,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-01,21:30,Talleres Cordoba,Tigre,2.63,2.05,4.75,1.5,2.5,3.4,1.3,10.0,1.06,1.91,3.3,4.2,1.08,8.0,1.4,2.75,2.25,1.62,4.0,1.22,9.0,1.07,2.0,1.75,1.22,1.33,1.83,1,1,1,2,[45],"[36, 67]"
2,6Po8Sw5c,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-02,19:15,Velez Sarsfield,Huracan,3.4,1.8,4.33,1.67,2.1,4.33,1.2,15.0,1.03,2.5,2.75,3.4,1.14,5.5,1.67,2.1,3.1,1.33,6.5,1.11,17.0,1.03,2.25,1.57,1.27,1.37,1.47,0,1,0,2,[],"[22, 80]"
3,QkpdHDdd,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-02,17:00,Argentinos Jrs,Instituto,2.38,2.0,6.5,1.57,2.25,3.75,1.25,11.0,1.05,1.65,3.6,6.0,1.1,7.0,1.5,2.5,2.5,1.5,5.0,1.17,13.0,1.04,2.5,1.5,1.13,1.29,2.2,0,0,2,0,"[55, 74]",[]
4,z5wPOe4M,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-02,17:00,Ind. Rivadavia,Lanus,3.75,1.91,3.5,1.57,2.25,4.0,1.22,13.0,1.04,2.88,3.0,2.63,1.1,7.0,1.5,2.5,2.5,1.5,5.0,1.17,11.0,1.05,2.1,1.67,1.44,1.36,1.4,0,0,1,1,[46],[71]
5,IPtlJZRq,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-03,21:00,Atl. Tucuman,Newells Old Boys,3.0,1.91,4.5,1.57,2.25,3.75,1.25,13.0,1.04,2.1,3.1,3.8,1.11,6.5,1.5,2.5,2.6,1.48,5.5,1.14,13.0,1.04,2.1,1.67,1.22,1.33,1.67,1,2,1,2,[41],"[4, 45]"
6,nJDwUjt2,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-03,19:00,San Lorenzo,Racing Club,3.5,1.8,4.33,1.73,2.08,4.5,1.18,17.0,1.03,2.5,2.8,3.4,1.14,5.5,1.67,2.1,3.4,1.33,7.0,1.1,17.0,1.03,2.5,1.5,1.3,1.4,1.5,1,1,3,2,"[2, 86, 90]","[30, 50]"
7,2iuHQHZ9,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-03,17:00,Barracas Central,Godoy Cruz,3.4,1.8,4.5,1.73,2.08,5.0,1.17,17.0,1.03,2.35,2.9,3.4,1.14,5.5,1.67,2.1,3.4,1.33,7.0,1.1,17.0,1.03,2.5,1.5,1.3,1.4,1.57,0,1,1,2,[51],"[26, 90]"
8,6oNruHl3,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-04,20:00,Platense,Defensa y Justicia,3.2,1.83,4.5,1.67,2.1,4.33,1.2,15.0,1.03,2.25,2.9,3.5,1.14,5.5,1.67,2.1,3.1,1.36,7.0,1.1,17.0,1.03,2.38,1.53,1.29,1.4,1.62,0,1,0,1,[],[11]
9,KUtlNrTH,ARGENTINA - TORNEO BETANO,2025,APERTURA,2025-03-04,19:30,Banfield,Independiente,4.0,1.83,3.6,1.67,2.1,4.33,1.2,15.0,1.03,3.0,3.0,2.6,1.14,5.5,1.67,2.1,3.1,1.36,7.0,1.1,17.0,1.03,2.25,1.57,1.5,1.36,1.36,0,0,0,0,[],[]
