## Which opponent is the most difficult to score points against? (FPL points analysis)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from scipy.stats import boxcox, zscore, zscore, norm, gaussian_kde

In [3]:
# Starting gameweek
week = 6

# Current gameweek 
gameweek = 16

## Collect available player data

In [4]:
# Initialize an empty list to store all individual, player gameweek data 
all_player_sep = []

# Loop through each gameweek
for i in range(week, gameweek + 1):  # Adjusting the range to start from 1 to gameweek
    # Read the CSV for the current gameweek
    x = pd.read_csv(rf'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\Players\Seperate_GW\GW_{i}.csv')
    
    # Append the current gameweek data to the list
    all_player_sep.append(x)

# Concatenate all dataframes in the list into a single dataframe
player_data = pd.concat(all_player_sep, axis=0, ignore_index=True)

# Drop unnamed column
player_data = player_data.drop(columns = ['Unnamed: 0'])

# Remove players who play less than 61 minutes in a game (i.e. they do not recieve their 2 points minimum for playoing this amount)
player_data = player_data[player_data['Minutes'] > 60].copy()

## Frequency Table

In [5]:
# Calculate frequencies of GW Points for all opponents
overall_frequency = (
    player_data['GW Points']
    .value_counts()
    .sort_index()
    .reset_index(name='Total')
)
overall_frequency.rename(columns={'index': 'GW Points'}, inplace=True)

# Initialize an empty list to store frequencies for each opponent
opponent_frequencies = []

# Get unique opponents and sort alphabetically
opponents = sorted(player_data['Opponent'].unique())

# Calculate frequency for each opponent
for opponent in opponents:
    # Filter data for the current opponent
    opponent_data = player_data[player_data['Opponent'] == opponent]
    
    # Calculate frequency for the opponent
    opp_frequency = (
        opponent_data['GW Points']
        .value_counts()
        .sort_index()
        .reset_index(name=f'{opponent}')
    )

    opp_frequency.rename(columns={'index': 'GW Points'}, inplace=True)
    
    # Append to the list
    opponent_frequencies.append(opp_frequency)

# Merge all opponent frequencies into a single DataFrame
counted_data = overall_frequency[['GW Points']]

for freq_df in opponent_frequencies:
    counted_data = counted_data.merge(freq_df, on='GW Points', how='left')

# Fill NaN values with 0 and remove decimals
counted_data.fillna(0, inplace=True)
counted_data = counted_data.astype(int)

# Combine data and reorder columns to place team names in alphabetical order
final_data = overall_frequency.merge(counted_data, on='GW Points')

# Show middle section of frequency table
final_data.iloc[10:15]

Unnamed: 0,GW Points,Total,ARS (A),ARS (H),AVL (A),AVL (H),BHA (A),BHA (H),BOU (A),BOU (H),...,NFO (A),NFO (H),SOU (A),SOU (H),TOT (A),TOT (H),WHU (A),WHU (H),WOL (A),WOL (H)
10,6,164,2,5,2,7,2,1,0,3,...,3,9,4,13,0,6,5,6,0,3
11,7,82,3,2,1,4,1,0,2,3,...,1,2,3,4,0,2,0,1,4,7
12,8,66,1,2,1,3,0,0,0,1,...,1,3,2,3,1,5,2,1,1,1
13,9,65,0,3,2,2,2,3,1,0,...,1,1,2,2,3,0,0,3,1,2
14,10,29,1,0,0,0,1,0,1,1,...,1,1,1,2,0,0,1,2,2,2


## Standardized GW Points vs clubs

In [6]:
# Filter and sort players by position and points
def filter_and_sort(data, positions, points_column='GW Points'):
    return data[data['Position'].isin(positions)].sort_values(by=points_column, ascending=False)

# Assign difficulty ratings based on z-scores using quartiles
def assign_difficulty(data, zscore_column='z_score', position_name=None):
    data['Difficulty'] = pd.qcut(data[zscore_column], q=5, labels=[5, 4, 3, 2, 1])
    data['Difficulty'] = data['Difficulty'].replace(1,2)
    return data

def process_players(data, positions, position_name):
    # Filter and sort the data based on the specified positions
    filtered = filter_and_sort(data, positions)

    # Compute the z-scores of the original GW Points
    filtered['z_score'] = zscore(filtered['GW Points'])

    # Now aggregate both the z_score and GW Points by Opponent
    z_scores_grouped = (
        filtered.groupby('Opponent', as_index=False)
        .agg({'z_score': 'mean', 'GW Points': 'mean'})
    )

    # Round the z_scores for readability
    z_scores_grouped['z_score'] = z_scores_grouped['z_score'].round(2)
    z_scores_grouped['Av_GW_Points'] = z_scores_grouped['GW Points'].round(2)
    z_scores_grouped.drop(columns = 'GW Points', inplace= True)

    # Add the Position column
    z_scores_grouped['Position'] = position_name

    # Assign difficulty ratings based on the z-score quantiles
    z_scores_grouped = assign_difficulty(z_scores_grouped, zscore_column='z_score', position_name=position_name)

    return z_scores_grouped


# Process defensive and attacking players
goalkeepers = process_players(player_data, ['GK', 'DEF'], 'GK')
defenders = process_players(player_data, ['GK','DEF'], 'DEF')
midfielders = process_players(player_data, ['MID', 'FWD'], 'MID')
forwards = process_players(player_data, ['MID','FWD'], 'FWD')

# Combined difficulty to assign to player games
FD_points = pd.concat([goalkeepers, defenders, midfielders, forwards])