In [217]:
import requests
import json
import os
import pandas as pd
from datetime import datetime
import concurrent.futures
import random


In [218]:
# Haaland index: 532

In [219]:
def fetch_url(url):
    """
    Fetches data from a given URL using the requests library.

    Parameters:
    ----------
    url : str
        The URL to fetch data from.

    Returns:
    ----------
    data : dict or None
        The JSON data retrieved from the URL if the request is successful, otherwise None.
    """
    response = requests.get(url)
    if response.ok:
        data = response.json()
        return data
    else:
        return None

In [220]:
def fetch_urls_concurrently(urls):
    """
    Fetches multiple URLs concurrently using ThreadPoolExecutor.

    Parameters:
    ----------
    urls : list
        A list of URLs to fetch.

    Returns:
    ----------
    results : list:
        A list containing the fetched results from the URLs.
    """
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit tasks to the executor
        futures = [executor.submit(fetch_url, url) for url in urls]

        # Retrieve results as they become available
        results = []
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            if result:
                results.append(result)
    return results

In [221]:
### Bootstrap data
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
bootstrap_data = fetch_url(url)

In [222]:
### Get player summary data
# Reformat team data
team_id_name = {str(team['id']): team['name'] for team in bootstrap_data['teams']}

# Reformat position data
position_id_name = {str(team['id']): team['plural_name_short'] for team in bootstrap_data['element_types']}

# Append team name and position name to player data
for player in bootstrap_data['elements']:
    team_id = str(player['team'])
    player['team_name']=team_id_name[team_id]

    position_id = str(player['element_type'])
    player['position']=position_id_name[position_id]


required_keys = ['first_name',
                 'second_name',
                 'id',
                 'dreamteam_count',
                 'now_cost',
                 'now_cost_rank',
                 'team_name',
                 'position',
                 'total_points',
                 'transfers_in',
                 'transfers_out',
                 'value_season',
                 'minutes',
                 'goals_scored',
                 'assists',
                 'clean_sheets',
                 'goals_conceded',
                 'own_goals',
                 'penalties_saved',
                 'penalties_missed',
                 'yellow_cards',
                 'red_cards',
                 'saves',
                 'bonus',
                 'bps',
                 'starts',
                 'saves',
                 'points_per_game',
                 'points_per_game_rank',
                 'points_per_game_rank_type',
                 'selected_by_percent',
                 'selected_rank',
                 'selected_rank_type',
                 'starts_per_90',
                 'clean_sheets_per_90',
                 'selected_rank_type',
                 ]

player_data_summary = [{k: player[k] for k in required_keys if k in player} for player in bootstrap_data['elements']] 

player_data_summary_df = pd.DataFrame(player_data_summary)

In [223]:
### Get player stats

In [224]:
# Get list of all player ids
player_id_list = []
for element in bootstrap_data['elements']:
    player_id = element['id']
    player_id_list = player_id_list + [player_id]


In [225]:
# Get random sample of players
player_id_list = random.sample(player_id_list, 10)

In [226]:
# Get URLs
player_id_urls = []
for player_id in player_id_list:
    player_id_url = f'https://fantasy.premierleague.com/api/element-summary/{player_id}/'
    player_id_urls = player_id_urls + [player_id_url]


In [227]:
# Get player data
players_id_data = fetch_urls_concurrently(player_id_urls)

In [228]:
player_weekly_data = [d["history"] for d in players_id_data]


In [229]:
# Reshape to pandas dataframe
player_list = []
week_list = []
data_dict_list = []

# Iterate over each list (representing a player's weeks)
for player_weeks in player_weekly_data:
    for week_data in player_weeks:
        player_list.append(week_data["element"])
        week_list.append(week_data["round"])
        data_dict_list.append(week_data)

# Create a DataFrame from the lists
player_weekly_data_df = pd.DataFrame(data_dict_list)

# Add player and week columns to the DataFrame
player_weekly_data_df.insert(0, 'player_id', player_list)
player_weekly_data_df.insert(1, 'gameweek', week_list)

In [230]:
# Join to player summary table to see who players are
columns_to_join = ["id","first_name", "second_name","team_name","position"]

player_weekly_data_df = pd.merge(player_weekly_data_df, player_data_summary_df[columns_to_join], left_on='player_id', right_on='id',how='left')

# Reduce colums
columns_to_select = ["player_id","gameweek","first_name", "second_name","team_name","position",
                     "total_points",
                     "minutes",
                     "goals_scored",
                     "assists",
                     "clean_sheets",
                     "own_goals",
                     "penalties_saved",
                     "penalties_missed",
                     "yellow_cards",
                     "red_cards",
                     "saves",
                     "bonus",
                     "starts",
                     "value",
                     "penalties_missed",
                     "transfers_balance",
                     "selected",
                     "transfers_in",
                     "transfers_out"
                     ]

player_weekly_data_df=player_weekly_data_df[columns_to_select]

In [None]:
# Fix double gameweeks by grouping by gameweek and aggregating other columns
# Include flag to say DGW

In [231]:
# Save to json/csv in data