# Import the necessary libraries

In [5]:
import pandas as pd
from utils import check_win, get_last_season_pos
from tqdm import tqdm

# tqdm is used to showcase a progress bar in the terminal

# Initializing the required previous years

In [2]:
years = ["2021-22","2022-23"]
previous_years = ["2020-21","2021-22"]
gws = [f"gw{i}" for i in range(1, 39)]

# Defining the Required Functions

In [None]:
def calculate_ratio_team_value(name):
    """Calculate the ratio of player value to team value"""
    team = df[df["name"] == name]["team"].iloc[0]
    total_value = df[df["team"] == team]["value"].sum()
    value = df[df["name"] == name]["value"].iloc[0]
    return value * 100 / total_value


def calculate_position_rank(name):
    """Calculate the number of players with a higher  value"""
    value = df[df["name"] == name]["value"].iloc[0]
    position = df[df["name"] == name]["position"].iloc[0]
    team = df[df["name"] == name]["team"].iloc[0]
    return df[
        (df["value"] > value) & (df["position"] == position) & (df["team"] == team)
    ]["value"].shape[0]

# Merging Stats from previous seasons for training dataset 

In [8]:


list_dfs = []

for i, year in enumerate(years):
    print(year)

    # get previous_seasons_data
    player_prev_stats = pd.read_csv(
        f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{previous_years[i]}/cleaned_players.csv"
    )
    player_prev_stats["name"] = (
        player_prev_stats["first_name"] + " " + player_prev_stats["second_name"]
    )
    player_prev_stats.drop(["first_name", "second_name"], axis=1, inplace=True)
    player_prev_stats.columns = player_prev_stats.columns + "_ex"

    # get opponent_team
    teams = pd.read_csv(
        f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{year}/teams.csv",
        encoding="latin-1",)[["id", "name"]]
    teams.columns = ["opponent_team", "opponent"]

    # opponents position last season
    teams["opponent_last_season_position"] = teams["opponent"].apply(
        get_last_season_pos(year)
    )

    for gameweek in tqdm(range(1, 39)):
#         print(gameweek)
        df = pd.read_csv(
            f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{year}/gws/gw{gameweek}.csv",
            encoding="latin-1",
        )

        # teams position last season
        df["last_season_position"] = df["team"].apply(get_last_season_pos(year))

        # calculate percentage value to team
        df["percent_value"] = df["name"].apply(calculate_ratio_team_value)
        df["position rank"] = df["name"].apply(calculate_position_rank)

        # chek if the result was a win or not
        df["match_result"] = check_win(df)

        # merge previous_season_data
        df = pd.merge(
            df, player_prev_stats, left_on="name", right_on="name_ex", how="left"
        )
        df["season"] = year
        df.drop("name_ex", axis=1, inplace=True)
        df["GW"] = gameweek

        # merge opponent team
        df = pd.merge(df, teams, on="opponent_team", how="left")
        list_dfs.append(df)


all_data = pd.concat(list_dfs)

2021-22


100%|██████████████████████████████████████████████████████████████████████████████████| 38/38 [01:19<00:00,  2.08s/it]


2022-23


100%|██████████████████████████████████████████████████████████████████████████████████| 38/38 [01:25<00:00,  2.24s/it]


In [11]:
all_data.head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,element_type_ex,season,GW,opponent,opponent_last_season_position,expected_assists,expected_goal_involvements,expected_goals,expected_goals_conceded,starts
0,Eric Bailly,DEF,Man Utd,0.0,0,0,0,0,0.0,286,...,DEF,2021-22,1,Leeds,9,,,,,
1,Keinan Davis,FWD,Aston Villa,0.4,0,0,0,0,0.0,49,...,FWD,2021-22,1,Watford,20,,,,,
2,Ayotomiwa Dele-Bashiru,MID,Watford,0.0,0,0,0,0,0.0,394,...,,2021-22,1,Aston Villa,11,,,,,
3,James Ward-Prowse,MID,Southampton,2.3,0,0,20,0,30.5,341,...,MID,2021-22,1,Everton,10,,,,,
4,Bruno Miguel Borges Fernandes,MID,Man Utd,4.4,0,3,61,0,35.9,277,...,MID,2021-22,1,Leeds,9,,,,,


In [16]:
all_data.to_csv("datasets/previous_seasons.csv")