In [3]:
# %% [markdown]
# # Coletor de Odds - Versão Notebook
# 
# **Funcionalidades:**
# 1. Processa apenas eventos não coletados
# 2. Para após 3 erros 429 consecutivos
# 3. Salva progresso automaticamente
# 4. Exibe progresso em tempo real

# %%
import pandas as pd
import requests
import time
import logging
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone

# %% [markdown]
# ## Configurações Iniciais

# %%
# Configurações da API
TOKEN = "183604-pWN7flhoAsWGu8"
URL_ODDS = "https://api.b365api.com/v2/event/odds"

# Controle de erros 429
MAX_429_ERRORS = 3
current_429_errors = 0
stop_execution = False

# Configurar logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# %% [markdown]
# ## Funções Principais

# %%
def make_request(event_id):
    global current_429_errors, stop_execution
    
    params = {
        'token': TOKEN,
        'event_id': event_id
    }
    
    for attempt in range(5):
        if stop_execution:
            return None
        
        try:
            response = requests.get(URL_ODDS, params=params, timeout=15)
            
            if response.status_code == 429:
                current_429_errors += 1
                logging.warning(f"Erro 429 detectado ({current_429_errors}/{MAX_429_ERRORS})")
                
                if current_429_errors >= MAX_429_ERRORS:
                    stop_execution = True
                    logging.error("Limite de erros 429 atingido! Interrompendo...")
                    return None
                
                wait_time = (2 ** attempt) + 2
                time.sleep(wait_time)
                continue
                
            response.raise_for_status()
            return response.json()
            
        except Exception as e:
            logging.warning(f"Erro na tentativa {attempt+1}: {str(e)}")
            time.sleep(2 ** attempt)
    
    return None

# %%
def process_odds(event_id):
    global stop_execution
    
    if stop_execution:
        return None
    
    try:
        data = make_request(event_id)
        
        if not data or data.get('success') != 1:
            return None
        
        odds = data.get('results', {}).get('odds', {})
        processed = {'event_id': event_id}
        
        # Processar mercados
        for market in ['1_1', '1_2', '1_3']:
            market_data = odds.get(market, [])
            
            # Selecionar odd mais relevante
            valid_odds = [o for o in market_data if o.get('ss') == '0-0']
            if not valid_odds:
                valid_odds = [o for o in market_data if not o.get('ss')]
            
            if valid_odds:
                best_odd = max(valid_odds, key=lambda x: int(x.get('add_time', 0)))
                prefix = f"{market}_"
                
                if market == '1_3':
                    processed.update({
                        f"{prefix}over": best_odd.get('over_od'),
                        f"{prefix}under": best_odd.get('under_od'),
                        f"{prefix}handicap": best_odd.get('handicap')
                    })
                else:
                    processed.update({
                        f"{prefix}home": best_odd.get('home_od'),
                        f"{prefix}draw": best_odd.get('draw_od'),
                        f"{prefix}away": best_odd.get('away_od')
                    })
                
                # Converter timestamp
                ts = int(best_odd.get('add_time', 0))
                processed[f"{prefix}time"] = datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
        
        return processed
    
    except Exception as e:
        logging.error(f"Erro no evento {event_id}: {str(e)}")
        return None

# %% [markdown]
# ## Carregar Dados e Executar

# %%
# Carregar datasets
try:
    df_existing = pd.read_csv('dados_com_odds.csv')
    existing_ids = set(df_existing['event_id'])
    logging.info(f"Carregados {len(df_existing)} eventos existentes")
except FileNotFoundError:
    df_existing = pd.DataFrame()
    existing_ids = set()
    logging.info("Arquivo não encontrado - iniciando do zero")

df_events = pd.read_csv('dados_eventos.csv')
new_events = df_events[~df_events['event_id'].isin(existing_ids)]

if new_events.empty:
    logging.info("Nenhum novo evento para processar")
else:
    logging.info(f"Encontrados {len(new_events)} novos eventos")

# %%
# Executar coleta com progresso
results = []
batch_size = 10  # Reduza se encontrar erros 429

with tqdm(total=len(new_events), desc="Processando Odds") as pbar:
    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = {executor.submit(process_odds, row.event_id): row for _, row in new_events.iterrows()}
        
        for future in as_completed(futures):
            if stop_execution:
                executor.shutdown(wait=False, cancel_futures=True)
                break
                
            result = future.result()
            if result:
                results.append(result)
                pbar.update(1)
                
            # Salvar progresso a cada batch
            if len(results) % batch_size == 0:
                df_temp = pd.DataFrame(results)
                if not df_temp.empty:
                    df_final = pd.concat([df_existing, df_temp], ignore_index=True)
                    df_final.to_csv('dados_com_odds.csv', index=False)
                    logging.info(f"Progresso salvo - {len(df_final)} eventos")

# %% [markdown]
# ## Salvamento Final e Resultados

# %%
# Consolidar dados finais
if results:
    df_final = pd.concat([df_existing, pd.DataFrame(results)], ignore_index=True)
    df_final.to_csv('dados_com_odds.csv', index=False)
    df_final.to_excel('dados_com_odds.xlsx', index=False)
    logging.info(f"Processo completo! Total de eventos: {len(df_final)}")
    
    # Exibir amostra
    display(df_final.tail(3))
else:
    logging.info("Nenhum novo dado foi processado")

2025-04-09 16:34:17,328 - INFO - Carregados 8445 eventos existentes
2025-04-09 16:34:17,363 - INFO - Encontrados 18439 novos eventos


Processando Odds:   0%|          | 0/18439 [00:00<?, ?it/s]

2025-04-09 16:34:18,497 - ERROR - Limite de erros 429 atingido! Interrompendo...
2025-04-09 16:34:21,115 - INFO - Nenhum novo dado foi processado
