In [2]:
import requests
import pandas as pd
import time
from typing import Dict, List, Optional

In [None]:
class Scraper:
    def __init__(self):
        self.base_url = "https://fbrapi.com"
        self.api_key = None
        self.premier_league_id = 9

    def generate_api_key(self) -> str:
        try:
            response = requests.post(f"{self.base_url}/generate_api_key")
            response.raise_for_status()
            api_key = response.json()['api_key']
            self.api_key = api_key
            print(f"API Key generated successfully: {api_key}")
            return api_key
        except requests.exceptions.RequestException as e:
            print(f"Error generating API key: {e}")
            return None
    
    def make_request(self, endpoint: str, params: Dict = None) -> Optional[Dict]:
        if not self.api_key:
            print("No API key available. Please generate one first")
            return None
        
        headers = {"X-API-KEY": self.api_key}
        url = f"{self.base_url}/{endpoint}"

        try:
            time.sleep(6)
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error making requests to {endpoint}: {e}")
            return None
        
    def get_available_seasons(self) -> List[str]:
        data = self.make_requests("league-seasons", {"league_id": self.premier_league_id})
        if data and 'data' in data:
            return [season['season_id'] for season in data['data']]
        return []
    
    def get_teams_for_seasons(self, season_id: str) -> List[Dict]:
        data = self.make_requests("league-standings", {
            "league_id": self.premier_league_id,
            "season_id": season_id
        })

        teams = []
        if data and 'data' in data:
            for standings_group in data['data']:
                if 'standings' in standings_group:
                    for team in standings_group['standings']:
                        teams.append({
                            'team_id': team['team_id'],
                            'team_name': team['team_name']
                        })
        
        return teams
    
    def get_team_matches(self, team_id: str, season_id: str) -> List[Dict]:
        data = self.make_requests("matches", {
            "team_id": team_id,
            "league_id": self.premier_league_id,
            "season_id": season_id
        })

        if data and 'data' in data:
            return data['data']
        return []
    
    def get_team_match_stats(self, team_id: str, season_id: str) -> List[Dict]:
        data = self.make_request("team-match-stats", {
            "team_id": team_id,
            "league_id": self.premier_league_id,
            "season_id": season_id,
        })

        if data and 'data' in data:
            return data['data']
        return []
    
    def process_team_data(self, team_id: str, team_name: str, season_id: str) -> pd.DataFrame:
        print(f"Processing {team_name} for season_id...")

        matches = self.get_team_matches(team_id, season_id)
        if not matches:
            print(f"No matches found for {team_name} in {season_id}")
            return pd.DataFrame()
        
        match_stats = self.get_team_match_stats(team_id, season_id)
        if not match_stats:
            print(f"No match stats found for {team_name} in {season_id}")
            return pd.DataFrame()
        
        matches_data = []
        for match in matches:
            matches_data.append({
                'Data': match['data'],
                'Time': match['time'],
                'Comp': 'Premier League',
                'Round': match['round'],
                'Venue': 'Home' if match['home_away'] == 'Home' else 'Away',
                'Result': match['result'],
                'GF': match['gf'],
                'GA': match['ga'],
                'Opponent': match['opponent'],
                'Formation': match.get('formation', ''),
                'Referee': match.get('referee', ''),
                'Match Report': '',
                'Notes': ''
            })

        matches_df = pd.DataFrame(matches_data)

        shooting_data = []
        for match_stat in match_stats:
            if 'stats' in match_stat and 'shooting' in match_stat['stats']:
                shooting_stats = match_stat['stats']['shooting']
                shooting_data.append({
                    'Data': match_stat['meta_data']['date'],
                    'Sh': shooting_stats.get('sh', 0),
                    'SoT': shooting_stats.get('sot', 0),
                    'Dist': shooting_stats.get('avg_sh_dist', 0),
                    'FK': shooting_stats.get('fk_sh', 0),
                    'PK': shooting_stats.get('pk_made', 0)
                    'PKatt': match_stat['stats'].get('schedule', {}).get('pk_att', 0) if 'schedule' in match_stat['stats'] else 0
                })

        shooting_df = pd.DataFrame(shooting_data)

        try:
            if not shooting_df.empty:
                team_data = matches_df.merge(shooting_df[["Date", "Sh", "SoT", "Dist", "FK", "PK", "PKatt"]], on="Date", how="left")
            else:
                team_data = matches_df.cop()
                for col in ["Sh", "SoT", "Dist", "FK", "PK", "PKatt"]:
                    team_data[col] = 0
        except ValueError as e:
            print(f"merge error for {team_name}: {e}")
            return pd.DataFrame()
        
        team_data = team_data[team_data["Comp"] == "Premier League"]

        team_data["Season"] = season_id
        team_data["Team"] = team_name
        
        return team_data
    
    def scrape_premier_league_data(self, years: List[int]) -> pd.DataFrame:

        if not self.api_key:
            print("Gnenerating API key...")
            if not self.generate_api_key():
                print("Failed to generate API key")
                return pd.DataFrame()
            
        all_matches = []

        available_seasons = self.get_available_seasons()
        print(f"Available seassons: {available_seasons}")

        for year in years:
            if year >= 2020:
                season_id = f"{year-1}-{year}"
            else:
                season_id = str(year)

            if season_id not in available_seasons:
                print(f"Season {season_id} not available in API. Available: {available_seasons}")
                continue
            
            print(f"Proscessing season {season_id} (year {year})...")

            teams = self.get_teams_for_season(season_id)
            if not teams:
                print(f"No teams found for season {season_id}")
                continue

            print(f"Found {len(teams)} teams for {season_id}")

            for team in teams:
                try:
                    team_name = team['team_name']
                    team_data = self.process_team_data(
                        team['team_id'],
                        team_name,
                        year
                    )

                    if not team_data.empty:
                        all_matches.append(team_data)
                        print(f"Added {len(team_data)} matches for {team_name}")

                    time.sleep(6)

                except Exception as e:
                    print(f"Error processing {team['team_name']}: {e}")
                    continue
        
        if all_matches:
            print(f"\nCombining data from {len(all_matches)} team-season...")
            match_df = pd.concat(all_matches, ignore_index=True)

            match_df.columns = [c.lower() for c in match_df.columns]

            print(f"Final dataset shape: {match_df.shape}")
            return match_df
        else:
            print("No data collected")
            return pd.DataFrame()