In [59]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import os

## Load the merged GW dataset

Data retrieved from [https://github.com/vaastav/Fantasy-Premier-League](https://github.com/vaastav/Fantasy-Premier-League/tree/master/data/2023-24/gws)

Each row is the stats for a player in a given GW.  

In [60]:
prev_szn = '2023-24'
df = pd.read_csv(f'data/{prev_szn}_gws/merged_gw.csv')
df.columns

Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'fixture', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'kickoff_time', 'minutes', 'opponent_team',
       'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards',
       'round', 'saves', 'selected', 'starts', 'team_a_score', 'team_h_score',
       'threat', 'total_points', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'was_home', 'yellow_cards', 'GW'],
      dtype='object')

Let's keep only relevant columns

In [61]:
cols = ['name', 'position','team',
        'xP', 'expected_assists', 'expected_goals',
        'assists', 'goals_scored',
        'bonus', 'bps',
        'minutes', 'total_points', 'value', 'GW']
df = df[cols]
df = df.rename(columns={'position': 'prev_position', 'expected_assists': 'xA', 'expected_goals': 'xG', 'value': 'prev_price'})
df.head()

Unnamed: 0,name,prev_position,team,xP,xA,xG,assists,goals_scored,bonus,bps,minutes,total_points,prev_price,GW
0,Femi Seriki,DEF,Sheffield Utd,0.5,0.0,0.0,0,0,0,0,0,0,40,1
1,Jack Hinshelwood,MID,Brighton,1.5,0.0,0.0,0,0,0,0,0,0,45,1
2,Jadon Sancho,MID,Man Utd,3.0,0.05,0.0,0,0,0,4,22,1,70,1
3,Rhys Norrington-Davies,DEF,Sheffield Utd,0.1,0.0,0.0,0,0,0,0,0,0,40,1
4,Vitaly Janelt,MID,Brentford,2.1,0.01,0.02,0,0,0,6,90,2,55,1


Let's remove the data from the relegated teams

In [62]:
# remove players from relegated teams
relegated = ['Sheffield Utd', 'Luton', 'Burnley']
df = df[~df['team'].isin(relegated)]
df.head()

Unnamed: 0,name,prev_position,team,xP,xA,xG,assists,goals_scored,bonus,bps,minutes,total_points,prev_price,GW
1,Jack Hinshelwood,MID,Brighton,1.5,0.0,0.0,0,0,0,0,0,0,45,1
2,Jadon Sancho,MID,Man Utd,3.0,0.05,0.0,0,0,0,4,22,1,70,1
4,Vitaly Janelt,MID,Brentford,2.1,0.01,0.02,0,0,0,6,90,2,55,1
5,Ionuț Radu,GK,Bournemouth,2.4,0.0,0.0,0,0,0,0,0,0,45,1
7,Jack Grealish,MID,Man City,4.1,0.0,0.0,0,0,0,0,0,0,75,1


## Add the current season data to the df

Get data from the FPL API

In [63]:
# Get names and prices for the current szn
position_mapping = {
        1: "GK",
        2: "DEF",
        3: "MID",
        4: "FWD"
    }
# Send a GET request to the current FPL API
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
response = requests.get(url)
data = response.json()

# Extract player information from the response
all_players = data['elements']

Get current season names, positions, and prices for all players

In [64]:
# Iterate over each player and print their name and price
names_current = []
positions_current = []
prices_current = []

for player in all_players:

    # get player name
    fname = player['first_name']
    lname = player['second_name']
    name = fname+' '+lname

    # get price at the start of the current szn
    price = player['now_cost'] - player['cost_change_start']

    # get current szn position - accounts for position changes
    element_type = player['element_type']
    position = position_mapping[element_type]

    #append to lists
    names_current.append(name)
    positions_current.append(position)
    prices_current.append(price)

# make sure each player has a name, position, and price
assert len(names_current) == len(positions_current) == len(prices_current)

## Add current price and position to df

Get players from last season that are playing this season. 

Note: this does not account for name changes

In [65]:
# get names in both previous and current szn
names_prev = df['name'].unique()

ns = []
poss = []
ps = []

# get names, positions, and prices for players in both previous and current szn
for i in range(len(names_current)):
    if names_current[i] in names_prev:
        ns.append(names_current[i])
        poss.append(positions_current[i])
        ps.append(prices_current[i])
        
# name and price, and name and position mappings
name_price_mapping = dict(zip(ns, ps))
name_pos_mapping = dict(zip(ns, poss))

# make sure mappings same size
assert len(name_price_mapping) == len(name_pos_mapping), f"Mappings not same size: {len(name_price)} != {len(name_pos)}"

Add price and position from current szn to df

In [66]:
df['current_price'] = df['name'].map(name_price_mapping)
df['current_position'] = df['name'].map(name_pos_mapping).reset_index(drop=True)
df.head()

Unnamed: 0,name,prev_position,team,xP,xA,xG,assists,goals_scored,bonus,bps,minutes,total_points,prev_price,GW,current_price,current_position
1,Jack Hinshelwood,MID,Brighton,1.5,0.0,0.0,0,0,0,0,0,0,45,1,50.0,MID
2,Jadon Sancho,MID,Man Utd,3.0,0.05,0.0,0,0,0,4,22,1,70,1,65.0,MID
4,Vitaly Janelt,MID,Brentford,2.1,0.01,0.02,0,0,0,6,90,2,55,1,50.0,MID
5,Ionuț Radu,GK,Bournemouth,2.4,0.0,0.0,0,0,0,0,0,0,45,1,,DEF
7,Jack Grealish,MID,Man City,4.1,0.0,0.0,0,0,0,0,0,0,75,1,65.0,MID


In [67]:
save_dir = f'data/scouting'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

df.to_csv(f'{save_dir}/scouting_2024-25.csv', index=False)