In [None]:
web_scraping_and_visualisation.ipynb

In [None]:
/Users/evgenigeorgiev/Documents/Jupyter\ Projects/FPL/web_scraping_and_visualisation.ipynb

In [2]:
import requests
import pandas as pd
from tqdm import tqdm
import os

In [3]:
# Base URL for the Fantasy Premier League API
base_url = 'https://fantasy.premierleague.com/api/'

# Function to get JSON data from a given URL
def get_json(url):
    try:
        response = requests.get(url)  # Sends a GET request to the URL
        response.raise_for_status()  # Will raise an HTTPError if the request is unsuccessful
        return response.json()  # Returns the JSON content from the response
    except requests.exceptions.RequestException:
        return None  # Returns None if there is any exception (e.g., network error, invalid URL)

# Function to create a DataFrame for a specific player (ID) and gameweek
def create_gameweek_specific_player_df(player_id, gameweek):
    player_data = get_json(base_url + f'element-summary/{player_id}/')  # Fetches player data
    if player_data:
        history_df = pd.DataFrame(player_data['history'])  # Converts the 'history' part of player data to a DataFrame
        return history_df[history_df['round'] == gameweek]  # Filters and returns data for the specified gameweek
    else:
        return pd.DataFrame()  # Returns an empty DataFrame if no player data is found

# Part of the code responsible for saving the data to file directory so we do not need to re-run scraping.
player_save_path = "/Users/evgenigeorgiev/Documents/Jupyter Projects/FPL/player_dataframes"
if not os.path.exists(player_save_path):
    os.makedirs(player_save_path)  # Creates the directory if it does not exist

player_ids = range(800)  # Adjust as necessary - this defines the range of player IDs to fetch

for gw in range(1, 39):  # Iterates through gameweeks 1 to 38
    gw_data = []  # Initializes an empty list to store gameweek data
    with tqdm(total=len(player_ids), desc=f"Processing GW{gw}") as pbar:  # Initializes a progress bar
        for player_id in player_ids:  # Iterates through each player ID
            gw_specific_df = create_gameweek_specific_player_df(player_id, gw)  # Gets the gameweek-specific DataFrame
            if not gw_specific_df.empty:
                gw_specific_df['player_id'] = player_id  # Adds the player ID to the DataFrame
                gw_data.append(gw_specific_df)  # Appends the DataFrame to the list
            pbar.update(1)  # Updates the progress bar

    if gw_data:
        combined_gw_df = pd.concat(gw_data, ignore_index=True)  # Combines all DataFrame into one
        file_path = os.path.join(player_save_path, f'gw{gw}_players.csv')  # Defines the file path
        combined_gw_df.to_csv(file_path, index=False)  # Saves the DataFrame to a CSV file
    else:
        print(f"No data available for Gameweek {gw}.")  # Prints a message if there is no data for the gameweek


Processing GW1:   0%|                                   | 0/800 [00:00<?, ?it/s]Fatal Python error: config_get_locale_encoding: failed to get the locale encoding: nl_langinfo(CODESET) failed
Python runtime state: preinitialized

Processing GW1: 100%|█████████████████████████| 800/800 [01:05<00:00, 12.28it/s]
Processing GW2: 100%|█████████████████████████| 800/800 [01:03<00:00, 12.54it/s]
Processing GW3: 100%|█████████████████████████| 800/800 [01:05<00:00, 12.14it/s]
Processing GW4: 100%|█████████████████████████| 800/800 [01:04<00:00, 12.35it/s]
Processing GW5: 100%|█████████████████████████| 800/800 [01:07<00:00, 11.86it/s]
Processing GW6: 100%|█████████████████████████| 800/800 [01:04<00:00, 12.41it/s]
Processing GW7: 100%|█████████████████████████| 800/800 [01:06<00:00, 12.02it/s]
Processing GW8: 100%|█████████████████████████| 800/800 [01:06<00:00, 12.02it/s]
Processing GW9: 100%|█████████████████████████| 800/800 [01:07<00:00, 11.86it/s]
Processing GW10: 100%|████████████████████

No data available for Gameweek 21.


Processing GW22: 100%|████████████████████████| 800/800 [01:05<00:00, 12.20it/s]


No data available for Gameweek 22.


Processing GW23: 100%|████████████████████████| 800/800 [01:05<00:00, 12.21it/s]


No data available for Gameweek 23.


Processing GW24: 100%|████████████████████████| 800/800 [01:05<00:00, 12.20it/s]


No data available for Gameweek 24.


Processing GW25: 100%|████████████████████████| 800/800 [01:06<00:00, 12.09it/s]


No data available for Gameweek 25.


Processing GW26: 100%|████████████████████████| 800/800 [01:04<00:00, 12.38it/s]


No data available for Gameweek 26.


Processing GW27: 100%|████████████████████████| 800/800 [01:05<00:00, 12.16it/s]


No data available for Gameweek 27.


Processing GW28: 100%|████████████████████████| 800/800 [01:05<00:00, 12.19it/s]


No data available for Gameweek 28.


Processing GW29: 100%|████████████████████████| 800/800 [01:05<00:00, 12.22it/s]


No data available for Gameweek 29.


Processing GW30: 100%|████████████████████████| 800/800 [01:04<00:00, 12.34it/s]


No data available for Gameweek 30.


Processing GW31: 100%|████████████████████████| 800/800 [01:05<00:00, 12.23it/s]


No data available for Gameweek 31.


Processing GW32: 100%|████████████████████████| 800/800 [01:05<00:00, 12.28it/s]


No data available for Gameweek 32.


Processing GW33: 100%|████████████████████████| 800/800 [01:05<00:00, 12.20it/s]


No data available for Gameweek 33.


Processing GW34: 100%|████████████████████████| 800/800 [01:06<00:00, 12.01it/s]


No data available for Gameweek 34.


Processing GW35: 100%|████████████████████████| 800/800 [01:05<00:00, 12.22it/s]


No data available for Gameweek 35.


Processing GW36: 100%|████████████████████████| 800/800 [01:04<00:00, 12.42it/s]


No data available for Gameweek 36.


Processing GW37: 100%|████████████████████████| 800/800 [01:03<00:00, 12.59it/s]


No data available for Gameweek 37.


Processing GW38: 100%|████████████████████████| 800/800 [01:05<00:00, 12.29it/s]

No data available for Gameweek 38.



