In [9]:
import requests
import sqlite3
import time
import os
from typing import Dict, Any, Optional, List
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class StatisticsLoader:
    def __init__(self, database_path: str, base_url: str = "https://v3.football.api-sports.io"):
        self.database_path = database_path
        self.api_key = os.getenv("x-rapidapi-key")
        self.base_url = base_url
        self.headers = {"x-apisports-key": self.api_key}
        self.conn = sqlite3.connect(self.database_path, timeout=60)  # Increased timeout to 60 seconds
        self.cursor = self.conn.cursor()
        self.cursor.execute("PRAGMA journal_mode=WAL;")  # Enable Write-Ahead Logging

    def initialize_table(self):
        self.cursor.execute('''CREATE TABLE IF NOT EXISTS fixture_statistics (
            fixture_id INTEGER, 
            team_id INTEGER, 
            team_name TEXT, 
            shots_on_goal INTEGER, 
            shots_off_goal INTEGER, 
            total_shots INTEGER, 
            blocked_shots INTEGER, 
            shots_insidebox INTEGER, 
            shots_outsidebox INTEGER, 
            fouls INTEGER, 
            corner_kicks INTEGER, 
            offsides INTEGER, 
            ball_possession REAL, 
            yellow_cards INTEGER, 
            red_cards INTEGER, 
            goalkeeper_saves INTEGER, 
            total_passes INTEGER, 
            passes_accurate INTEGER, 
            passes_percentage REAL, 
            expected_goals REAL, 
            goals_prevented REAL, 
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 
            PRIMARY KEY (fixture_id, team_id))''')
        self.conn.commit()

    def fetch_data(self, endpoint: str, params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        for _ in range(3):
            try:
                response = requests.get(self.base_url + endpoint, headers=self.headers, params=params)
                response.raise_for_status()
                return response.json()
            except requests.exceptions.RequestException as e:
                print(f"API request failed: {e}. Retrying in 5 seconds...")
                time.sleep(5)
        return None

    def get_remaining_fixtures(self, refresh_interval_days: int = 7) -> List[int]:
        query = f'''
            SELECT f.fixture_id 
            FROM fixtures f
            LEFT JOIN fixture_statistics fs ON f.fixture_id = fs.fixture_id
            WHERE f.date < DATE('now') 
              AND f.status = 'Match Finished'
              AND (fs.last_updated IS NULL OR fs.last_updated < DATE('now', '-{refresh_interval_days} days'))
        '''
        remaining_fixtures = self.cursor.execute(query).fetchall()
        return [f[0] for f in remaining_fixtures]

    def process_fixture(self, fixture_id: int):
        try:
            print(f"Processing fixture ID: {fixture_id}")
            stats_response = self.fetch_data("/fixtures/statistics", {"fixture": fixture_id})
            if not stats_response:
                print(f"Failed to fetch data for fixture ID: {fixture_id}")
                return
            for team_stats in stats_response.get("response", []):
                stats = {stat["type"]: stat["value"] for stat in team_stats["statistics"]}
                self.cursor.execute('''INSERT OR REPLACE INTO fixture_statistics (
                    fixture_id, team_id, team_name, shots_on_goal, shots_off_goal, total_shots, blocked_shots, 
                    shots_insidebox, shots_outsidebox, fouls, corner_kicks, offsides, ball_possession, yellow_cards, 
                    red_cards, goalkeeper_saves, total_passes, passes_accurate, passes_percentage, expected_goals, goals_prevented,
                    last_updated
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)''', (
                    fixture_id, 
                    team_stats["team"]["id"], 
                    team_stats["team"]["name"], 
                    stats.get("Shots on Goal"), 
                    stats.get("Shots off Goal"), 
                    stats.get("Total Shots"), 
                    stats.get("Blocked Shots"), 
                    stats.get("Shots insidebox"), 
                    stats.get("Shots outsidebox"), 
                    stats.get("Fouls"), 
                    stats.get("Corner Kicks"), 
                    stats.get("Offsides"), 
                    float(stats.get("Ball Possession", "0%").replace("%", "")) if stats.get("Ball Possession") else None,
                    stats.get("Yellow Cards"), 
                    stats.get("Red Cards"), 
                    stats.get("Goalkeeper Saves"), 
                    stats.get("Total passes"), 
                    stats.get("Passes accurate"), 
                    float(stats.get("Passes %").replace("%", "")) if stats.get("Passes %") else None, 
                    stats.get("expected_goals"), 
                    stats.get("goals_prevented")
                ))
        except sqlite3.OperationalError as e:
            print(f"Database error while processing fixture ID {fixture_id}: {e}")

    def update_statistics(self, refresh_interval_days: int = 7):
        remaining_fixtures = self.get_remaining_fixtures(refresh_interval_days=refresh_interval_days)
        total_fixtures = len(remaining_fixtures)
        start_time = time.time()
        for index, fixture_id in enumerate(remaining_fixtures, start=1):
            self.process_fixture(fixture_id)
            if index % 100 == 0:  # Commit every 100 fixtures to reduce database locking
                self.conn.commit()

            progress = (index / total_fixtures) * 100
            elapsed_time = time.time() - start_time
            estimated_time_left = (elapsed_time / index) * (total_fixtures - index)
            print(f"\rProgress: {progress:.2f}% | Time left: {estimated_time_left / 60:.2f} minutes", end="")

        self.conn.commit()
        print("\nStatistics update completed.")

    def close_connection(self):
        try:
            self.conn.commit()
        except sqlite3.OperationalError as e:
            print(f"Error during commit: {e}")
        finally:
            self.conn.close()
            print("Database connection closed.")


if __name__ == "__main__":
    DATABASE_PATH = "./data/football_data.db"
    loader = StatisticsLoader(DATABASE_PATH)
    loader.initialize_table()
    loader.update_statistics(refresh_interval_days=7)  # Refresh stats older than 7 days
    loader.close_connection()


OperationalError: database is locked