In [None]:
# Código Antigo
import numpy as np
import time
import requests
import pandas as pd
import logging
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed



# Intervalo de datas
start_date = datetime(2025, 2, 1)
end_date = datetime(2025, 4, 24)

# Detalhes da API
url_events = "https://api.b365api.com/v3/events/ended"
token = "183604-pWN7flhoAsWGu8"

def make_request_with_retry(url, params, retries=5, backoff_factor=2, timeout=10):
    for attempt in range(1, retries + 1):
        try:
            response = requests.get(url, params=params, timeout=timeout)
            response.raise_for_status()
            return response
        except requests.RequestException as e:
            wait_time = backoff_factor ** attempt
            logging.warning(f"Tentativa {attempt} falhou: {e}. Retentando em {wait_time} segundos.")
            time.sleep(wait_time)
            if attempt == retries:
                logging.error(f"Falha após {retries} tentativas para URL: {url}")
                raise e

def fetch_events_for_date(date):
    formatted_date = date.strftime('%Y%m%d')
    params = {
        "token": token,
        "sport_id": "1",
        "league_id": "22614",
        "day": formatted_date,
        "page": 1
    }
    events = []
    while True:
        try:
            response = make_request_with_retry(url_events, params)
            data = response.json()
            events.extend(data['results'])
            if params['page'] * data['pager']['per_page'] < data['pager']['total']:
                params['page'] += 1
            else:
                break
        except Exception as e:
            logging.error(f"Erro ao buscar eventos para {formatted_date}: {e}")
            break
    return events

# Coletar eventos
all_events = []
for single_date in pd.date_range(start_date, end_date):
    events = fetch_events_for_date(single_date)
    all_events.extend(events)
    logging.info(f"Eventos coletados em {single_date.strftime('%Y-%m-%d')}: {len(events)}")

# Criar DataFrame
df_events = pd.DataFrame([{
    'event_id': event['id'],
    'date': datetime.utcfromtimestamp(int(event['time'])).strftime('%Y-%m-%d %H:%M:%S'),
    'away_team': event['away']['name'].split('(')[0].strip(),
    'away_player': event['away']['name'].split('(')[1].split(')')[0] if '(' in event['away']['name'] else '',
    'away_score': event.get('scores', {}).get('2', {}).get('away', 'N/A'),
    'home_team': event['home']['name'].split('(')[0].strip(),
    'home_player': event['home']['name'].split('(')[1].split(')')[0] if '(' in event['home']['name'] else '',
    'home_score': event.get('scores', {}).get('2', {}).get('home', 'N/A'),
} for event in all_events])

# Processar colunas numéricas
df_events['home_score'] = pd.to_numeric(df_events['gols_casa'].replace('N/A', np.nan), errors='coerce')
df_events['away_score'] = pd.to_numeric(df_events['gols_fora'].replace('N/A', np.nan), errors='coerce')
df_events['total_score'] = df_events['home_score'] + df_events['away_score']
df_events.sort_values(by='date', inplace=True)

# Salvar dados brutos
df_events.to_csv('22614_raw.csv', index=False)
logging.info("Dados dos eventos salvos com sucesso.")

In [None]:
# Data Extractor
import numpy as np
import time
import requests
import pandas as pd
import logging
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed
from IPython.display import display

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('data_extractor.log'),
        logging.StreamHandler()
    ]
)

# Constants
LEAGUE_ID = 22614
START_DATE = datetime(2025, 2, 1)
END_DATE = datetime(2025, 4, 24)
API_TOKEN = "183604-pWN7flhoAsWGu8"
EVENTS_URL = "https://api.b365api.com/v3/events/ended"
ODDS_URL = "https://api.b365api.com/v2/event/odds"
CHECKPOINT_INTERVAL = 10
MAX_CONSECUTIVE_ERRORS = 5

# Column Definitions
BASE_COLUMNS = [
    'event_id', 'league_id', 'date', 
    'away_team', 'away_player', 'away_score',
    'home_team', 'home_player', 'home_score',
    'total_score'
]

ODDS_COLUMNS = [
    '1_1_home_od', '1_1_draw_od', '1_1_away_od', '1_1_handicap',
    '1_3_over_od', '1_3_under_od', '1_3_handicap',
    'odds_timestamp'
]

def make_request_with_retry(url, params, retries=5, backoff_factor=2, timeout=10):
    for attempt in range(1, retries + 1):
        try:
            response = requests.get(url, params=params, timeout=timeout)
            response.raise_for_status()
            return response
        except requests.RequestException as e:
            wait_time = backoff_factor ** attempt
            logging.warning(f"Attempt {attempt} failed: {e}. Retrying in {wait_time} seconds.")
            time.sleep(wait_time)
            if attempt == retries:
                logging.error(f"All {retries} attempts failed for URL: {url}")
                raise

def fetch_events_for_date(date):
    formatted_date = date.strftime('%Y%m%d')
    params = {
        "token": API_TOKEN,
        "sport_id": "1",
        "league_id": str(LEAGUE_ID),
        "day": formatted_date,
        "page": 1
    }
    
    events = []
    while True:
        try:
            response = make_request_with_retry(EVENTS_URL, params)
            data = response.json()
            current_events = data.get('results', [])
            
            # Add league_id to each event
            for event in current_events:
                event['league_id'] = LEAGUE_ID
                
            events.extend(current_events)
            
            # Pagination handling
            pager = data.get('pager', {})
            if params['page'] * pager.get('per_page', 0) < pager.get('total', 0):
                params['page'] += 1
            else:
                break
                
        except Exception as e:
            logging.error(f"Error fetching events for {formatted_date}: {e}")
            break
            
    return events

def fetch_odds(event_id):
    params = {'event_id': event_id, 'token': API_TOKEN}
    try:
        response = make_request_with_retry(ODDS_URL, params)
        data = response.json()
        
        if data.get('success', 0) != 1:
            return None
            
        return data.get('results', {}).get('odds', {})
        
    except Exception as e:
        logging.error(f"Error fetching odds for {event_id}: {e}")
        return None

def process_odds(odds_data):
    processed = {col: '-' for col in ODDS_COLUMNS}
    
    if not odds_data:
        return processed
        
    try:
        # Market 1_1 (Match Odds)
        market_1_1 = odds_data.get('1_1', [])
        if market_1_1:
            latest = max(market_1_1, key=lambda x: int(x.get('add_time', 0)))
            processed.update({
                '1_1_home_od': latest.get('home_od', '-'),
                '1_1_draw_od': latest.get('draw_od', '-'),
                '1_1_away_od': latest.get('away_od', '-'),
                '1_1_handicap': latest.get('handicap', '-')
            })
            
        # Market 1_3 (Over/Under)
        market_1_3 = odds_data.get('1_3', [])
        if market_1_3:
            valid_odds = [o for o in market_1_3 
                        if o.get('ss') == '0-0' and 
                        o.get('over_od') not in ('-', '') and 
                        o.get('under_od') not in ('-', '')]
            
            if valid_odds:
                best = min(valid_odds, key=lambda x: int(x.get('add_time', 0)))
            else:
                best = max(market_1_3, key=lambda x: int(x.get('add_time', 0)))
                
            processed.update({
                '1_3_over_od': best.get('over_od', '-'),
                '1_3_under_od': best.get('under_od', '-'),
                '1_3_handicap': best.get('handicap', '-'),
                'odds_timestamp': datetime.fromtimestamp(
                    int(best.get('add_time', 0)), 
                    tz=timezone.utc
                ).strftime('%Y-%m-%d %H:%M:%S')
            })
            
    except Exception as e:
        logging.error(f"Error processing odds: {e}")
        
    return processed

def save_checkpoint(df, emergency=False):
    filename = 'emergency_save.csv' if emergency else 'soccer_data.csv'
    try:
        df.to_csv(filename, index=False)
        logging.info(f"Checkpoint saved: {filename}")
    except Exception as e:
        logging.error(f"Failed to save checkpoint: {e}")

# Main Execution
if __name__ == "__main__":
    # Step 1: Collect Events
    all_events = []
    for date in pd.date_range(START_DATE, END_DATE):
        try:
            daily_events = fetch_events_for_date(date)
            all_events.extend(daily_events)
            logging.info(f"Collected {len(daily_events)} events for {date.strftime('%Y-%m-%d')}")
        except Exception as e:
            logging.error(f"Failed to process {date}: {e}")
    
    # Step 2: Create DataFrame
    df = pd.DataFrame([{
        'event_id': event['id'],
        'league_id': LEAGUE_ID,
        'date': datetime.utcfromtimestamp(int(event['time'])).strftime('%Y-%m-%d %H:%M:%S'),
        'away_team': event['away']['name'].split('(')[0].strip(),
        'away_player': event['away']['name'].split('(')[1].split(')')[0] 
                      if '(' in event['away']['name'] else '',
        'away_score': int(event.get('scores', {}).get('2', {}).get('away', 0)),
        'home_team': event['home']['name'].split('(')[0].strip(),
        'home_player': event['home']['name'].split('(')[1].split(')')[0] 
                      if '(' in event['home']['name'] else '',
        'home_score': int(event.get('scores', {}).get('2', {}).get('home', 0)),
    } for event in all_events])
    
    df['total_score'] = df['home_score'] + df['away_score']
    
    # Initialize odds columns
    for col in ODDS_COLUMNS:
        df[col] = '-'
    
    # Step 3: Fetch Odds with progress saving
    consecutive_errors = 0
    
    for idx, row in df.iterrows():
        if idx > 0 and idx % CHECKPOINT_INTERVAL == 0:
            save_checkpoint(df)
            
        try:
            odds_data = fetch_odds(row['event_id'])
            processed = process_odds(odds_data)
            
            for col in ODDS_COLUMNS:
                df.at[idx, col] = processed.get(col, '-')
                
            consecutive_errors = 0
            
        except Exception as e:
            logging.error(f"Failed to process event {row['event_id']}: {e}")
            consecutive_errors += 1
            
            if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
                logging.critical("Maximum consecutive errors reached!")
                save_checkpoint(df, emergency=True)
                exit()
                
    # Final Save
    save_checkpoint(df)
    logging.info("Processing completed successfully!")
    display(df.head())

2025-04-24 17:51:36,307 - INFO - Collected 265 events for 2025-02-01
2025-04-24 17:51:38,516 - INFO - Collected 218 events for 2025-02-02
2025-04-24 17:51:48,135 - INFO - Collected 314 events for 2025-02-03
2025-04-24 17:52:08,095 - INFO - Collected 374 events for 2025-02-04
2025-04-24 17:52:13,492 - INFO - Collected 374 events for 2025-02-05
2025-04-24 17:52:25,372 - INFO - Collected 380 events for 2025-02-06
2025-04-24 17:52:29,695 - INFO - Collected 378 events for 2025-02-07
2025-04-24 17:52:32,646 - INFO - Collected 235 events for 2025-02-08
2025-04-24 17:52:36,447 - INFO - Collected 209 events for 2025-02-09
2025-04-24 17:52:51,948 - INFO - Collected 326 events for 2025-02-10
2025-04-24 17:53:10,090 - INFO - Collected 380 events for 2025-02-11
2025-04-24 17:53:13,061 - INFO - Collected 319 events for 2025-02-12
2025-04-24 17:53:18,001 - INFO - Collected 343 events for 2025-02-13
2025-04-24 17:53:25,618 - INFO - Collected 371 events for 2025-02-14
2025-04-24 17:53:32,993 - INFO - C