In [1]:
import requests
import pandas as pd
from datetime import datetime
import os

def get_fpl_data():
    """Fetch all data from FPL API"""
    print("Fetching data from FPL API...")
    
    # Get bootstrap data (players, teams, etc.)
    bootstrap_url = "https://fantasy.premierleague.com/api/bootstrap-static/"
    bootstrap = requests.get(bootstrap_url).json()
    
    # Get current event (gameweek)
    current_gw = None
    for event in bootstrap['events']:
        if event['is_current']:
            current_gw = event['id']
            break
    
    if not current_gw:
        # If no current GW, get the next one
        for event in bootstrap['events']:
            if event['is_next']:
                current_gw = event['id']
                break
    
    print(f"Current/Next Gameweek: {current_gw}")
    
    # Create team ID to name mapping
    teams = {team['id']: team['name'] for team in bootstrap['teams']}
    
    # Create element (player) mapping
    players_basic = {}
    for player in bootstrap['elements']:
        players_basic[player['id']] = {
            'name': player['web_name'],
            'full_name': f"{player['first_name']} {player['second_name']}",
            'position': get_position(player['element_type']),
            'team': teams[player['team']],  # Return team name
            'value': player['now_cost'] / 10,  # Convert to actual price
            'selected': player['selected_by_percent']
        }
    
    return bootstrap, current_gw, teams, players_basic

def get_position(element_type):
    """Convert position code to text"""
    positions = {1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}
    return positions.get(element_type, 'Unknown')

def get_gameweek_data(gameweek):
    """Fetch detailed gameweek data for all players"""
    print(f"Fetching GW{gameweek} data...")
    
    url = f"https://fantasy.premierleague.com/api/event/{gameweek}/live/"
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Error: Could not fetch GW{gameweek} data")
        return None
    
    return response.json()

def get_fixtures_for_gameweek(gameweek):
    """Get fixture information for a specific gameweek"""
    print(f"Fetching fixtures for GW{gameweek}...")
    
    fixtures_url = "https://fantasy.premierleague.com/api/fixtures/"
    fixtures = requests.get(fixtures_url).json()
    
    # Filter for specific gameweek
    gw_fixtures = {}
    for fixture in fixtures:
        if fixture['event'] == gameweek:
            # Map both home and away teams to this fixture
            gw_fixtures[fixture['id']] = {
                'home_team': fixture['team_h'],
                'away_team': fixture['team_a'],
                'team_h_score': fixture['team_h_score'],
                'team_a_score': fixture['team_a_score'],
                'kickoff_time': fixture['kickoff_time']
            }
    
    return gw_fixtures

def create_gameweek_csv(gameweek, output_dir='data'):
    """Create CSV file in the exact format required"""
    
    # Get all necessary data
    bootstrap, current_gw, teams, players_basic = get_fpl_data()
    gw_live_data = get_gameweek_data(gameweek)
    fixtures = get_fixtures_for_gameweek(gameweek)
    
    if not gw_live_data:
        return None
    
    # Build the dataframe
    rows = []
    
    for element_data in gw_live_data['elements']:
        element_id = element_data['id']
        stats = element_data['stats']
        explain = element_data.get('explain', [])
        
        # Debug: Print available stats for first player
        if element_id == 1:
            print("\n" + "="*60)
            print("DEBUG: Available stats fields:")
            print("="*60)
            print(stats.keys())
            print("="*60 + "\n")
        
        # Get basic player info
        player_info = players_basic.get(element_id, {})
        
        # Get fixture info
        fixture_id = None
        opponent_team = None
        was_home = None
        kickoff_time = None
        team_a_score = None
        team_h_score = None
        
        if explain:
            for fixture_explain in explain:
                fixture_id = fixture_explain.get('fixture')
                if fixture_id:
                    break
        
        if fixture_id and fixture_id in fixtures:
            fixture_info = fixtures[fixture_id]
            kickoff_time = fixture_info['kickoff_time']
            team_h_score = fixture_info['team_h_score']
            team_a_score = fixture_info['team_a_score']
            
            # Determine if home/away and opponent
            player_team_id = None
            for player in bootstrap['elements']:
                if player['id'] == element_id:
                    player_team_id = player['team']
                    break
            
            if player_team_id == fixture_info['home_team']:
                was_home = True
                opponent_team = fixture_info['away_team']  # Return opponent ID
            else:
                was_home = False
                opponent_team = fixture_info['home_team']  # Return opponent ID
        
        # Format kickoff_time
        if kickoff_time:
            try:
                dt = datetime.fromisoformat(kickoff_time.replace('Z', '+00:00'))
                kickoff_time = dt.strftime('%Y-%m-%d %H:%M:%S')
            except:
                pass
        
        # Get player from bootstrap for additional data
        player_bootstrap = next((p for p in bootstrap['elements'] if p['id'] == element_id), None)
        
        row = {
            'name': player_info.get('full_name', ''),
            'position': player_info.get('position', ''),
            'team': player_info.get('team', ''),
            'xP': stats.get('expected_points', 0),
            'assists': stats.get('assists', 0),
            'bonus': stats.get('bonus', 0),
            'bps': stats.get('bps', 0),
            'clean_sheets': stats.get('clean_sheets', 0),
            'clearances_blocks_interceptions': stats.get('clearances_blocks_interceptions', 0),
            'creativity': stats.get('creativity', 0),
            'defensive_contribution': stats.get('defensive_contribution', 0),
            'element': element_id,
            'expected_assist': stats.get('expected_assists', 0),
            'expected_goal_involvements': stats.get('expected_goal_involvements', 0),
            'expected_goals': stats.get('expected_goals', 0),
            'expected_goals_conceded': stats.get('expected_goals_conceded', 0),
            'fixture': fixture_id,
            'goals_conceded': stats.get('goals_conceded', 0),
            'goals_scored': stats.get('goals_scored', 0),
            'ict_index': stats.get('ict_index', 0),
            'influence': stats.get('influence', 0),
            'kickoff_time': kickoff_time,
            'minutes': stats.get('minutes', 0),
            'modified': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'opponent_team': opponent_team,
            'own_goals': stats.get('own_goals', 0),
            'penalties_missed': stats.get('penalties_missed', 0),
            'penalties_saved': stats.get('penalties_saved', 0),
            'recoveries': stats.get('recoveries', 0),
            'red_cards': stats.get('red_cards', 0),
            'round': gameweek,
            'saves': stats.get('saves', 0),
            'selected': player_info.get('selected', 0),
            'starts': stats.get('starts', 0),
            'tackles': stats.get('tackles', 0),
            'team_a_score': team_a_score,
            'team_h_score': team_h_score,
            'threat': stats.get('threat', 0),
            'total_points': stats.get('total_points', 0),
            'transfers_balance': player_bootstrap.get('transfers_in_event', 0) - player_bootstrap.get('transfers_out_event', 0) if player_bootstrap else 0,
            'transfers_in': player_bootstrap.get('transfers_in_event', 0) if player_bootstrap else 0,
            'transfers_out': player_bootstrap.get('transfers_out_event', 0) if player_bootstrap else 0,
            'value': player_info.get('value', 0),
            'was_home': was_home,
            'yellow_cards': stats.get('yellow_cards', 0)
        }
        
        rows.append(row)
    
    # Create DataFrame
    df = pd.DataFrame(rows)
    
    # Ensure column order matches the required format
    column_order = [
        'name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps', 'clean_sheets',
        'clearances_blocks_interceptions', 'creativity', 'defensive_contribution', 'element',
        'expected_assist', 'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded',
        'fixture', 'goals_conceded', 'goals_scored', 'ict_index', 'influence', 'kickoff_time',
        'minutes', 'modified', 'opponent_team', 'own_goals', 'penalties_missed', 'penalties_saved',
        'recoveries', 'red_cards', 'round', 'saves', 'selected', 'starts', 'tackles',
        'team_a_score', 'team_h_score', 'threat', 'total_points', 'transfers_balance',
        'transfers_in', 'transfers_out', 'value', 'was_home', 'yellow_cards'
    ]
    
    df = df[column_order]
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Save to CSV
    output_file = os.path.join(output_dir, f'gw{gameweek}.csv')
    df.to_csv(output_file, index=False)
    
    print(f"\n‚úÖ Success! Created {output_file}")
    print(f"üìä Total players: {len(df)}")
    print(f"üìÖ Gameweek: {gameweek}")
    print(f"\nFirst few rows:")
    print(df[['name', 'position', 'team', 'total_points', 'minutes']].head(10))
    
    return output_file

def main():
    """Main function to run the scraper"""
    print("=" * 60)
    print("FPL GAMEWEEK DATA SCRAPER")
    print("=" * 60)
    
    # Get current gameweek
    _, current_gw, _, _ = get_fpl_data()
    
    print(f"\nFetching data for Gameweek {current_gw}")
    print("-" * 60)
    
    # Allow user to specify different gameweek
    user_input = input(f"\nPress Enter to use GW{current_gw}, or enter a different gameweek number: ").strip()
    
    if user_input:
        try:
            gameweek = int(user_input)
        except:
            print("Invalid input. Using current gameweek.")
            gameweek = current_gw
    else:
        gameweek = current_gw
    
    # Create the CSV
    output_file = create_gameweek_csv(gameweek)
    
    if output_file:
        print(f"\nüéâ Done! File saved at: {output_file}")
        print("\nYou can now import this CSV into Power BI!")
    else:
        print("\n‚ùå Failed to create CSV file")

if __name__ == "__main__":
    main()

FPL GAMEWEEK DATA SCRAPER
Fetching data from FPL API...
Current/Next Gameweek: 11

Fetching data for Gameweek 11
------------------------------------------------------------



Press Enter to use GW11, or enter a different gameweek number:  


Fetching data from FPL API...
Current/Next Gameweek: 11
Fetching GW11 data...
Fetching fixtures for GW11...

DEBUG: Available stats fields:
dict_keys(['minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index', 'clearances_blocks_interceptions', 'recoveries', 'tackles', 'defensive_contribution', 'starts', 'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded', 'total_points', 'in_dreamteam'])


‚úÖ Success! Created data\gw11.csv
üìä Total players: 752
üìÖ Gameweek: 11

First few rows:
                           name position     team  total_points  minutes
0             David Raya Mart√≠n       GK  Arsenal             1       90
1    Kepa Arrizabalaga Revuelta       GK  Arsenal             0        0
2                     Karl Hein       GK  Arsenal             0        0
3       