In [1]:
##################################################################################################
## -- Libs
##################################################################################################

import pandas as pd
import numpy as np
import neat
import gc

import warnings

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
##################################################################################################
## -- Read in Data
##################################################################################################

# Meta Data
meta = pd.read_csv('../2. Data/meta_data.csv')
feature_types_dict = dict(zip(meta['feature'], meta['feature_type']))

apply_stats_features = meta[meta['apply_stats'] == 1]['feature'].tolist()
modelling_valid_features = meta[meta['modelling_valid'] == 1]['feature'].tolist()

# Current Season
data_22_23 = pd.read_csv('../2. Data/22-23 FFL.csv', dtype=feature_types_dict, usecols=modelling_valid_features).drop_duplicates()
data_22_23['name'] = data_22_23['name'].str.replace(' ', '_')
data_22_23['kickoff_time'] = pd.to_datetime(data_22_23['kickoff_time'], format='%Y-%m-%dT%H:%M:%SZ')

print(f'{data_22_23.shape = }')
ALL_PLAYERS_22_23 = data_22_23.name.unique()

# Last Season
data_21_22 = pd.read_csv('../2. Data/21-22 FFL.csv', dtype=feature_types_dict).drop_duplicates()
data_21_22['name'] = data_21_22['name'].str.replace(' ', '_')

print(f'{data_21_22.shape = }')

# Clear 
del modelling_valid_features
gc.collect()

data_22_23.shape = (26505, 16)
data_21_22.shape = (25447, 36)


17

In [3]:
##################################################################################################
## -- Apply stats to Current Season
##################################################################################################

def create_player_dataframes(data, apply_stats_features):
    """
    Creates individual player DataFrames with missing rows for earlier game weeks,
    calculates rolling averages, and concatenates them into a single DataFrame.

    Args:
        data (pd.DataFrame): Original dataset containing player data.
        apply_stats_features (list): List of features for which rolling averages are calculated.

    Returns:
        pd.DataFrame: Updated DataFrame with player data.
    """

    # Create a list to store individual player DataFrames
    player_dfs = []

    # Loop through each player in the dataset
    for player in data['name'].unique():
        player_data = data[data['name'] == player]
        
        # Sort by game week to ensure the data is in order
        player_data = player_data.sort_values(by=['GW'])

        # Find the earliest game week for the player
        earliest_gw = player_data['GW'].min()
        
        # Create missing rows for earlier game weeks
        if earliest_gw > 1:
            missing_data = pd.DataFrame({
                'name': [player] * (earliest_gw - 1),
                'GW': range(1, earliest_gw),
                'player_available': [False] * (earliest_gw - 1)
            })
            
            # Add in the other columns from data and set them to NaN
            for column in data.columns:
                if column not in missing_data.columns:
                    missing_data[column] = np.nan

            # Set the features specified in apply_stats_features to NaN
            for feature in apply_stats_features:
                missing_data[feature] = np.nan
                    
            updated_player_data = pd.concat([missing_data, player_data]).reset_index(drop=True)
        
            # player_available: True for rows in player_data
            updated_player_data['player_available'] = updated_player_data['player_available'].fillna(True)
        else:
            updated_player_data = player_data
            updated_player_data['player_available'] = True
        
        # Calculate the mean and 3/5 GW rolling average for features where apply_stats=True,
        # but only for the weeks when the player is available
        for feature in apply_stats_features:
            updated_player_data[f'{feature}_mean_upto_GW'] = updated_player_data[feature].where(updated_player_data['player_available']).expanding().mean().round(2)
            updated_player_data[f'{feature}_rolling_3GW'] = updated_player_data[feature].where(updated_player_data['player_available']).rolling(window=3).mean().round(2)
            updated_player_data[f'{feature}_rolling_5GW'] = updated_player_data[feature].where(updated_player_data['player_available']).rolling(window=5).mean().round(2)

        player_dfs.append(updated_player_data)

    # Concatenate all player DataFrames into a single DataFrame
    data_updated = pd.concat(player_dfs, ignore_index=True)

    return data_updated


data_22_23_updated = create_player_dataframes(data_22_23, apply_stats_features)

print(f'{data_22_23_updated.shape = }')

data_22_23_updated.shape = (29732, 47)


In [4]:
##################################################################################################
## -- Create Game Week 0 Data
##################################################################################################

def create_player_gw0_summary(data, ALL_PLAYERS_22_23):
    """
    Creates a summary of player values for the 2021-2022 season and prepares the dataset for Game Week 0.

    Args:
        data (pd.DataFrame): DataFrame containing updated player data for the 2022-2023 season.
        ALL_PLAYERS_22_23 (list): List of player names for the 2022-2023 season.

    Returns:
        pd.DataFrame: Updated DataFrame with player data.
    """
    # Create summary of players' values for the 2021-2022 season
    data_21_22_summary = data.groupby('name').agg(
        last_season_value_mean=('value', 'mean'),
        last_season_value_max=('value', 'max'),
        last_season_value_min=('value', 'min')
    ).reset_index().round(3)

    # Create dataset for Game Week 0
    gw0_player_data = pd.DataFrame({
        'name': ALL_PLAYERS_22_23,
        'GW': 0
    })

    # Join Position, Team, and Player Available from GW1
    gw0_player_data = gw0_player_data.merge(
        data[data['GW'] == 1][['name', 'position', 'team', 'player_available']],
        how='left', on='name'
    )

    # Join Last Season Value Summary Stats
    gw0_player_data = gw0_player_data.merge(
        data_21_22_summary, how='left', on='name'
    )

    # Add in the other columns from data and set them to NaN
    for column in data.columns:
        if column not in gw0_player_data.columns:
            gw0_player_data[column] = np.nan

    # Join GW0 to the main dataset
    data_gw0 = pd.concat([gw0_player_data, data])

    # Create a binary column indicating whether the player is new (last season value is missing)
    data_gw0['new_player'] = data_gw0['last_season_value_mean'].isnull().astype(int)

    return data_gw0

data_22_23_updated_GW0 = create_player_gw0_summary(data_22_23_updated, ALL_PLAYERS_22_23)
print(f'{data_22_23_updated_GW0.shape = }')


data_22_23_updated_GW0.shape = (30510, 51)


In [5]:
##################################################################################################
## -- One Hot Encode
##################################################################################################

def create_one_hot_encoded_table(data):
    """
    Creates a one-hot encoded DataFrame by converting categorical columns to dummy variables.

    Args:
        data_22_23_updated (pd.DataFrame): DataFrame containing updated player data.
        GW_column_name (str): Name of the game week column (default is 'GW').

    Returns:
        pd.DataFrame: Final DataFrame with one-hot encoded features.
    """

    # Create one-hot encoding for char columns
    one_hot_encoding_list = [data]

    char_cols = [col for col in data.columns if data[col].dtype == object]
    for col in char_cols:
        one_hot = pd.get_dummies(data[col], prefix=col, prefix_sep=':').astype(int)
        one_hot_encoding_list.append(one_hot)

    # Manually create one-hot encoding for GW
    one_hot_gw = pd.get_dummies(data['GW'], prefix='GW', prefix_sep=':').astype(int)
    one_hot_encoding_list.append(one_hot_gw)

    # Create Final  Table
    data_final = pd.concat(one_hot_encoding_list, axis=1)

    return data_final


data_22_23_final = create_one_hot_encoded_table(data_22_23_updated_GW0)
print(f'{data_22_23_final.shape = }')


data_22_23_final.shape = (30510, 891)


In [9]:
data_22_23_final.name.iloc[0]

'Nathan_Redmond'

In [56]:
import sys
# Create a dictionary to store player data
def create_player_dict(data, ALL_PLAYERS_22_23):
    player_dict = {}
    for i, player in enumerate(ALL_PLAYERS_22_23):
        sys.stdout.write(f'\r{player}: {i/len(ALL_PLAYERS_22_23):.2f}%')
        sys.stdout.write('                        ')
        player_data = data[data['name'] == player]
        player_data = player_data.sort_values(by=['GW','kickoff_time'])
        player_data = player_data.groupby('GW').last().reset_index()
        player_dict[player] = {GW: player_data[player_data['GW'] == GW].to_dict(orient='records') for GW in range(39)}

    return player_dict

player_dict = create_player_dict(data_22_23_final, ALL_PLAYERS_22_23)

Ross_Barkley: 0.25%                                               

KeyboardInterrupt: 

In [43]:
len(ALL_PLAYERS_22_23)

777

In [None]:
##################################################################################################
## -- Create Player GW Dict
##################################################################################################

# Create a dictionary to store player data
def create_player_dict(data, ALL_PLAYERS_22_23):
    player_dict = {}
    for player in ALL_PLAYERS_22_23:
        player_data = data[data['name'] == player]
        player_data = player_data.sort_values(by=['GW','kickoff_time'])
        player_data = player_data.groupby('GW').last().reset_index()
        player_dict[player] = {GW: {col: player_data[player_data['GW'] == GW][col].iloc[0] if not player_data[player_data['GW'] == GW].empty else None for col in player_data.columns} for GW in range(39)}
    return player_dict

player_dict2 = create_player_dict(data_22_23_final, ALL_PLAYERS_22_23)


IndexError: single positional indexer is out-of-bounds

In [7]:
def create_player_dict_optimized(data, ALL_PLAYERS_22_23):
    # Sort and group data upfront
    sorted_data = data.sort_values(by=['GW', 'kickoff_time'])
    grouped_data = sorted_data.groupby(['name', 'GW']).last().reset_index()

    # Use a dictionary comprehension to create player_dict
    player_dict = {
        player: {GW: grouped_data[(grouped_data['name'] == player) & (grouped_data['GW'] == GW)] for GW in range(39)} for player in ALL_PLAYERS_22_23
        }

    return player_dict

player_dict2 = create_player_dict_optimized(data_22_23_final, ALL_PLAYERS_22_23)


In [16]:
player_dict['Weston_McKennie'][22][['kickoff_time'] + [col for col in data_22_23_final.columns if 'influence' in col]]

Unnamed: 0,kickoff_time,influence,influence_mean_upto_GW,influence_rolling_3GW,influence_rolling_5GW
22,2023-02-08 20:00:00,17.2,9.8,,


In [32]:
player_dict['Nathan_Redmond'][0].to_dict(orient='records')

[{'GW': 0,
  'name': 'Nathan_Redmond',
  'position': 'MID',
  'team': 'Southampton',
  'player_available': True,
  'last_season_value_mean': 53.158,
  'last_season_value_max': 55.0,
  'last_season_value_min': 53.0,
  'xP': nan,
  'assists': nan,
  'clean_sheets': nan,
  'creativity': nan,
  'fixture': nan,
  'goals_conceded': nan,
  'goals_scored': nan,
  'influence': nan,
  'kickoff_time': NaT,
  'red_cards': nan,
  'value': nan,
  'yellow_cards': nan,
  'xP_mean_upto_GW': nan,
  'xP_rolling_3GW': nan,
  'xP_rolling_5GW': nan,
  'assists_mean_upto_GW': nan,
  'assists_rolling_3GW': nan,
  'assists_rolling_5GW': nan,
  'clean_sheets_mean_upto_GW': nan,
  'clean_sheets_rolling_3GW': nan,
  'clean_sheets_rolling_5GW': nan,
  'creativity_mean_upto_GW': nan,
  'creativity_rolling_3GW': nan,
  'creativity_rolling_5GW': nan,
  'goals_conceded_mean_upto_GW': nan,
  'goals_conceded_rolling_3GW': nan,
  'goals_conceded_rolling_5GW': nan,
  'goals_scored_mean_upto_GW': nan,
  'goals_scored_rolli

In [17]:
##################################################################################################
## -- create Player and Team class Definitions
##################################################################################################

# Player
class Player:
    def __init__(self, **kwargs):
        self.points = 0
        for key, value in kwargs.items():
            setattr(self, key, value)

# Create list of Player Objects
all_available_players = []

for player in player_dict:
    player_to_add = Player(**player_dict[player][0])
    all_available_players.append(player_to_add)
            
# Team
class Team:
    def __init__(self):
        self.original_budget = 100_000_000
        self.dynamic_budget = self.original_budget # Budget that gets updated each game week as players value changes
        self.players = []
        self.team_points = 0
        self.MAX_POSITIONS = {'GK': 1,'DEF': 4,'MID': 4,'FWD': 2}

    def add_player(self, player):
        if player.value <= self.dynamic_budget:
            self.players.append(player)
            self.dynamic_budget -= player.value

    def remove_player(self, player):
        self.players.remove(player)
        self.dynamic_budget += player.value

    def update_team_points(self):
        self.team_points += sum(player.gw_points for player in self.players)

    def can_player_be_added_based_on_position(self, player):
        # Check the number of players of this position in the current team and compare with maximum allowed
        players_of_same_position = [p for p in self.players if p.position == player.position]
        return len(players_of_same_position) < self.MAX_POSITIONS[player.position]

In [26]:
x =1
x

1

In [25]:
# Assuming you have an existing Player object
existing_player = all_available_players[0]  # Replace with the actual index of your player

# List all attributes
for attribute in dir(existing_player):
    if not attribute.startswith("__"):  # Exclude built-in attributes
        print(f"{attribute}: {getattr(existing_player, attribute)}")



0    0
Name: GW, dtype: int64
0    1
Name: GW:0, dtype: int32
0    0
Name: GW:1, dtype: int32
0    0
Name: GW:10, dtype: int32
0    0
Name: GW:11, dtype: int32
0    0
Name: GW:12, dtype: int32
0    0
Name: GW:13, dtype: int32
0    0
Name: GW:14, dtype: int32
0    0
Name: GW:15, dtype: int32
0    0
Name: GW:16, dtype: int32
0    0
Name: GW:17, dtype: int32
0    0
Name: GW:18, dtype: int32
0    0
Name: GW:19, dtype: int32
0    0
Name: GW:2, dtype: int32
0    0
Name: GW:20, dtype: int32
0    0
Name: GW:21, dtype: int32
0    0
Name: GW:22, dtype: int32
0    0
Name: GW:23, dtype: int32
0    0
Name: GW:24, dtype: int32
0    0
Name: GW:25, dtype: int32
0    0
Name: GW:26, dtype: int32
0    0
Name: GW:27, dtype: int32
0    0
Name: GW:28, dtype: int32
0    0
Name: GW:29, dtype: int32
0    0
Name: GW:3, dtype: int32
0    0
Name: GW:30, dtype: int32
0    0
Name: GW:31, dtype: int32
0    0
Name: GW:32, dtype: int32
0    0
Name: GW:33, dtype: int32
0    0
Name: GW:34, dtype: int32
0    0
Name: GW:3

In [None]:

# Create Dict of Player attributes for start
# Convert the DataFrame to a list of dictionaries, one for each row
gw0_player_data_dict = gw0_player_data.to_dict(orient='records')

# Create a dictionary using the 'name' field as the key for each record
gw0_player_data_dict = {record['name']: record for record in gw0_player_data_dict}
gw0_player_data_dict


# %% -- Create Player Object
class Player:
    def __init__(self, **kwargs):
        self.points = 0
        for key, value in kwargs.items():
            setattr(self, key, value)

# Create list of Player Objects
all_available_players = []

for player in gw0_player_data_dict:
    player_to_add = Player(**gw0_player_data_dict[player])
    all_available_players.append(player_to_add)


class Team:
    def __init__(self):
        self.original_budget = 100_000_000
        self.dynamic_budget = self.original_budget # Budget that gets updated each game week as players value changes
        self.players = []
        self.team_points = 0

    def add_player(self, player):
        if player.value <= self.dynamic_budget:
            self.players.append(player)
            self.dynamic_budget -= player.value

    def remove_player(self, player):
        self.players.remove(player)
        self.dynamic_budget += player.value

    def update_team_points(self):
        self.team_points += sum(player.gw_points for player in self.players)

    def can_player_be_added_based_on_position(self, player):
        # Check the number of players of this position in the current team and compare with maximum allowed
        players_of_same_position = [p for p in self.players if p.position == player.position]
        return len(players_of_same_position) < MAX_POSITIONS[player.position]

# %% -- Helper Functions

# Function to prepare input for a single player
def prepare_input_for_player(player,gw):
    player_input = []
    
    # Normalise numeric features
    for feature in last_season_features + current_season_features:
        feature_to_add = (getattr(player, feature) - means_gw_player_data[gw]) / stds_gw_player_data[gw]
        player_input.append(feature_to_add)

    # Non-normalise numeric features
    for feature in ['new_player']:
        feature_to_add = getattr(player, feature)
        player_input.append(feature_to_add)

    # One-hot encoded character features
    for feature in ['position', 'team', 'gw']:
        one_hot_vector = one_hot_encoding_dicts[feature][getattr(player, feature)]
        player_input.extend(one_hot_vector)

    return player_input


# %% -- Setup NEAT structure

# Load NEAT config
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config.txt')

# Create the population
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
p.add_reporter(neat.StatisticsReporter())


def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)

        # Initialize fitness
        genome.fitness = 0
        
        # Create Team Object
        team = Team()

        # GAME WEEK 0 | INITIAL SELECTION
        output_dict = {}

        # Pass each Player Inputs to genome | Save Output in Dict
        for player in all_available_players:
            player_input = prepare_input_for_player(player,0)
            output = net.activate(player_input)
            output_dict[player.name] = output

        # Create a list of tuples (player, output) from the network output
        indexed_output = [(player, output_dict[player.name]) for player in all_available_players]

        # Sort the list by value in descending order
        indexed_output.sort(key=lambda x: x[1], reverse=True)

        # Keep track of the number of players added
        num_players_added = 0

        # Iterate over the sorted list, adding players to the team
        for player, _ in indexed_output:
            # Select player
            player = all_available_players[player]
            
            # Check if the player can be added based on position and budget
            if team.can_add_player_based_on_position(player) and team.dynamic_budget >= player.value:
                team.add_player(player)
                
                # Increment the number of players added
                num_players_added += 1
                
                # Stop if 11 players have been added
                if num_players_added == 11:
                    break

        # GAME WEEK 1+ | PLAY GAME
        # Loop through each Game Week
        for game_week in gw_values[1:]:
            # Update Player stats for Game Week
            for player in all_available_players:
                # Fetch the data for this player and this game week
                gw_data = data_22_23_gw_dict[game_week]
                player_data_for_gw = gw_data[gw_data['name'] == player.name]
                
                # If there is no data for this player for this game week, continue to the next player
                if player_data_for_gw.empty:
                    continue
                
                # Update player stats for this game week
                for stat in current_season_features:
                    setattr(player, stat, player_data_for_gw[stat].values[0])

                # Update Game Week 
                setattr(player, stat, game_week)



            # Update budget based on new player values
            team.dynamic_budget = sum(player.value for player in team.players) - team.original_budget


            output_dict = {}

            # Pass each Player Inputs to genome | Save Output in Dict
            for player in all_available_players:
                player_input = prepare_input_for_player(player,0)
                output = net.activate(player_input)
                output_dict[player.name] = output

            genome.fitness = team.team_points

# Run until a solution is found.
winner = p.run(eval_genomes, 50) 


