In [1]:
##################################################################################################
## -- Libs
##################################################################################################

import pandas as pd
import numpy as np
import neat
import gc
import sys
import pickle

import warnings

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
##################################################################################################
## -- Read in Data
##################################################################################################

# Meta Data
meta = pd.read_csv('../2. Data/meta_data.csv')
feature_types_dict = dict(zip(meta['feature'], meta['feature_type']))

apply_stats_features = meta[meta['apply_stats'] == 1]['feature'].tolist()
modelling_valid_features = meta[meta['modelling_valid'] == 1]['feature'].tolist()

# Current Season
data_22_23 = pd.read_csv('../2. Data/22-23 FFL.csv', dtype=feature_types_dict, usecols=modelling_valid_features).drop_duplicates()
data_22_23['name'] = data_22_23['name'].str.replace(' ', '_')
data_22_23['kickoff_time'] = pd.to_datetime(data_22_23['kickoff_time'], format='%Y-%m-%dT%H:%M:%SZ')

print(f'{data_22_23.shape = }')
ALL_PLAYERS_22_23 = data_22_23.name.unique()

# Last Season
data_21_22 = pd.read_csv('../2. Data/21-22 FFL.csv', dtype=feature_types_dict).drop_duplicates()
data_21_22['name'] = data_21_22['name'].str.replace(' ', '_')

print(f'{data_21_22.shape = }')

# Clear 
del modelling_valid_features
gc.collect()

data_22_23.shape = (26505, 17)
data_21_22.shape = (25447, 36)


17

In [3]:
##################################################################################################
## -- Apply stats to Current Season
##################################################################################################

def create_player_dataframes(data, apply_stats_features):
    """
    Creates individual player DataFrames with missing rows for earlier game weeks,
    calculates rolling averages, and concatenates them into a single DataFrame.

    Args:
        data (pd.DataFrame): Original dataset containing player data.
        apply_stats_features (list): List of features for which rolling averages are calculated.

    Returns:
        pd.DataFrame: Updated DataFrame with player data.
    """

    # Create a list to store individual player DataFrames
    player_dfs = []

    # Loop through each player in the dataset
    for player in data['name'].unique():
        player_data = data[data['name'] == player]
        
        # Sort by game week to ensure the data is in order
        player_data = player_data.sort_values(by=['GW'])

        # Find the missing game weeks for the player
        gw_values = set(player_data['GW'].unique())
        missing_values = set(range(1, 39)) - gw_values
        minimum_gw = player_data['GW'].min()
        less_than_min_gw = [gw for gw in missing_values if gw < minimum_gw]
        greater_than_min_gw = [gw for gw in missing_values if gw > minimum_gw]

        # Create missing rows for earlier game weeks
        if len(missing_values) > 0:
            missing_data = pd.DataFrame({
                'name': [player] * len(missing_values),
                'GW': list(missing_values),
                'player_available': [False] * len(less_than_min_gw) + [True] * len(greater_than_min_gw),
                'team_played': [False] * len(missing_values)
            })

            
            # Add in the other columns from data and set them to NaN
            for column in data.columns:
                if column not in missing_data.columns:
                    missing_data[column] = np.nan

            # Set the features specified in apply_stats_features to NaN
            for feature in apply_stats_features:
                missing_data[feature] = np.nan
                    
            updated_player_data = pd.concat([missing_data, player_data]).reset_index(drop=True)
        
            # player_available: True for rows in player_data
            updated_player_data['player_available'] = updated_player_data['player_available'].fillna(True)
            updated_player_data['team_played'] = updated_player_data['team_played'].fillna(True)
        else:
            updated_player_data = player_data
            updated_player_data['player_available'] = True
            updated_player_data['team_played'] = True
        
        # Calculate the mean and 3/5 GW rolling average for features where apply_stats=True,
        # but only for the weeks when the player is available / the team had a match
        for feature in apply_stats_features:
            updated_player_data[f'{feature}_mean_upto_GW'] = updated_player_data[feature].where(updated_player_data['team_played']).expanding().mean().round(2)
            updated_player_data[f'{feature}_rolling_3GW'] = updated_player_data[feature].where(updated_player_data['team_played']).rolling(window=3).mean().round(2)
            updated_player_data[f'{feature}_rolling_5GW'] = updated_player_data[feature].where(updated_player_data['team_played']).rolling(window=5).mean().round(2)

        player_dfs.append(updated_player_data)

    # Convert updated_player_data to int
    updated_player_data['player_available'] = updated_player_data['player_available'].astype(int)
    updated_player_data['team_played'] = updated_player_data['team_played'].astype(int)


    # Concatenate all player DataFrames into a single DataFrame
    data_updated = pd.concat(player_dfs, ignore_index=True)

    # Sort values
    data_updated = data_updated.sort_values(['name', 'GW'])

    # Fill NaN values with the value from the row above
    # Due to some GWs not having games - use data from last GW
    data_updated = data_updated.fillna(method='ffill')

    # Set total_points to zero on GWs where team did not play
    data_updated.loc[data_updated['team_played'] == 0, 'total_points'] = 0

    return data_updated

data_22_23_updated = create_player_dataframes(data_22_23, apply_stats_features)

print(f'{data_22_23_updated.shape = }')

data_22_23_updated.shape = (31109, 52)


In [4]:
##################################################################################################
## -- Create Game Week 0 Data
##################################################################################################

def create_player_gw0_summary(data, ALL_PLAYERS_22_23):
    """
    Creates a summary of player values for the 2021-2022 season and prepares the dataset for Game Week 0.

    Args:
        data (pd.DataFrame): DataFrame containing updated player data for the 2022-2023 season.
        ALL_PLAYERS_22_23 (list): List of player names for the 2022-2023 season.

    Returns:
        pd.DataFrame: Updated DataFrame with player data.
    """
    # Create summary of players' values for the 2021-2022 season
    data_21_22_summary = data.groupby('name').agg(
        last_season_value_mean=('value', 'mean'),
        last_season_value_max=('value', 'max'),
        last_season_value_min=('value', 'min')
    ).reset_index().round(3)

    # Create dataset for Game Week 0
    gw0_player_data = pd.DataFrame({
        'name': ALL_PLAYERS_22_23,
        'GW': 0
    })

    # Join Position, Team, and Player Available from GW1
    gw0_player_data = gw0_player_data.merge(
        data[data['GW'] == 1][['name', 'position', 'team', 'player_available','team_played']],
        how='left', on='name'
    )

    # Add in the other columns from data and set them to NaN
    for column in data.columns:
        if column not in gw0_player_data.columns:
            gw0_player_data[column] = np.nan

    # Join GW0 to the main dataset
    data_gw0 = pd.concat([gw0_player_data, data])

    # Join Last Season Value Summary Stats
    data_gw0 = data_gw0.merge(
        data_21_22_summary, how='left', on='name'
    )

    # Create a binary column indicating whether the player is new (last season value is missing)
    data_gw0['new_player_this_season'] = data_gw0['last_season_value_mean'].isnull().astype(int)

    return data_gw0

data_22_23_updated_GW0 = create_player_gw0_summary(data_22_23_updated, ALL_PLAYERS_22_23)
print(f'{data_22_23_updated_GW0.shape = }')


data_22_23_updated_GW0.shape = (31887, 56)


In [5]:
##################################################################################################
## -- One Hot Encode
##################################################################################################

def create_one_hot_encoded_table(data):
    """
    Creates a one-hot encoded DataFrame by converting categorical columns to dummy variables.

    Args:
        data_22_23_updated (pd.DataFrame): DataFrame containing updated player data.
        GW_column_name (str): Name of the game week column (default is 'GW').

    Returns:
        pd.DataFrame: Final DataFrame with one-hot encoded features.
    """

    # Create one-hot encoding for char columns
    one_hot_encoding_list = [data]

    # char_cols = [col for col in data.columns if data[col].dtype == object]
    char_cols = ['position']

    for col in char_cols:
        one_hot = pd.get_dummies(data[col], prefix=col, prefix_sep='_').astype(int)
        one_hot_encoding_list.append(one_hot)

    # Manually create one-hot encoding for GW
    one_hot_gw = pd.get_dummies(data['GW'], prefix='GW', prefix_sep='_').astype(int)
    one_hot_encoding_list.append(one_hot_gw)

    # Create Final  Table
    data_final = pd.concat(one_hot_encoding_list, axis=1)

    return data_final


data_22_23_final = create_one_hot_encoded_table(data_22_23_updated_GW0)
print(f'{data_22_23_final.shape = }')

data_22_23_final.shape = (31887, 99)


In [6]:
# Create a dictionary to store player data
def create_player_dict(data, ALL_PLAYERS_22_23):
    # Define the player attributes you want to include in the Player Objects
    # player_attributes = [col for col in data.columns if col not in ['GW', 'name', 'position', 'team', 'kickoff_time']]
    player_attributes = data.columns 

    player_dict = {}
    for i, player in enumerate(ALL_PLAYERS_22_23):
        sys.stdout.write(f'\r{player}: {(i+1)/len(ALL_PLAYERS_22_23)*100:.2f}% - {i+1} of {len(ALL_PLAYERS_22_23)}'+' '*100)
        sys.stdout.flush()
        player_data = data[data['name'] == player]
        player_data = player_data.sort_values(by=['GW','kickoff_time'])
        player_data = player_data.groupby('GW').last().reset_index()
        
        # Select only the desired columns before converting to a dictionary
        player_dict[player] = {GW: player_data[player_data['GW'] == GW][player_attributes].to_dict(orient='records') for GW in range(39)}

    return player_dict

# Only run if changing player attributes
condition = True
if condition:
    player_dict = create_player_dict(data_22_23_final, ALL_PLAYERS_22_23)

    # save
    with open('../2. Data/player_dict.pkl', 'wb') as file:
        pickle.dump(player_dict, file)

# load
with open('../2. Data/player_dict.pkl', 'rb') as file:
    player_dict = pickle.load(file)


Yago_de_Santiago_Alonso: 100.00% - 777 of 777                                                                                                                  

In [8]:
##################################################################################################
## -- Create Player and Team class Definitions
##################################################################################################

# Player
class Player:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def update_attributes(self, player_dict, GW):
        attributes = player_dict[GW][0]
        for key, value in attributes.items():
            setattr(self, key, value)
    
    def get_attributes(self):
        attributes = vars(self)
        exclude_list = ['GW', 'name', 'position', 'team', 'kickoff_time']
        return [value for key, value in attributes.items() if key not in exclude_list]

# Create dict of Player Objects
all_available_players = {}

for player in player_dict:
    GW=0
    player_to_add = Player(**player_dict[player][GW][0])
    all_available_players[player] = player_to_add
           
# Team
class Team:
    def __init__(self):
        self.original_budget = 100_000_000
        self.dynamic_budget = self.original_budget # Budget that gets updated each game week as players value changes
        self.players = []
        self.team_points = 0
        self.MAX_POSITIONS = {'GK': 1,'DEF': 4,'MID': 4,'FWD': 2}

    def add_player(self, player):
        if self.can_player_be_added_based_on_position(player) and player.value <= self.dynamic_budget:
            self.players.append(player)
            self.dynamic_budget -= player.value

    def remove_player(self, player):
        self.players.remove(player)
        self.dynamic_budget += player.value

    def update_team_points(self):
        self.team_points += sum(player.total_points for player in self.players)

    def can_player_be_added_based_on_position(self, new_player):
        # Determine the player's position
        player_position = ''
        for position in ['GK', 'DEF', 'MID', 'FWD']:
            if getattr(new_player, f'position_{position}') == 1:
                player_position = position
                break  
        
        # Get the number of players in the team with the same position
        players_of_same_position = [p for p in self.players if getattr(p, f'position_{player_position}') == 1]

        # Check if adding the new player would exceed the maximum allowed players for this position
        return len(players_of_same_position) < self.MAX_POSITIONS[player_position]

In [None]:
# ## Testing

# player_attributes = [col for col in data_22_23_final.columns if col not in ['GW', 'name', 'position', 'team', 'kickoff_time']]

# existing_player = all_available_players[list(all_available_players.keys())[0]]  

# # List all attributes
# attributelist = existing_player.get_attributes()

# print(len(attributelist))
# print(len(player_attributes))
# attributelist

# # for col in dir(existing_player):
# #     if col in 
# #     print(f'{col}: {getattr(existing_player, col)}')

In [None]:
##################################################################################################
## -- Setup NEAT structure
##################################################################################################

# Specify the path to your configuration file
config_path = "../config.txt"

# Load configuration
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)

# Create the population
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
p.add_reporter(neat.StatisticsReporter())

def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)

        # Initialize fitness
        genome.fitness = 0
        
        # Create Team Object
        team = Team()

        # GAME WEEK 0 | INITIAL SELECTION
        for name, player in all_available_players.items():
            player.update_attributes(player_dict[name], 0)

        output_dict = {}

        # Pass each Player Inputs to genome | Save Output in Dict
        for name, player in all_available_players.items():
            # Get all attributes of 'player' as a list
            output = net.activate(player.get_attributes())
            output_dict[name] = output

        # Rank players by output, highest values first
        ranked_players = sorted(output_dict.items(), key=lambda item: item[1], reverse=True)

        # Iterate over the sorted list, adding players to the team
        for player_name, _ in ranked_players:
            player = all_available_players[player_name]
            # Check if the player can be added based on position and budget
            team.add_player(player)
                    
            # Stop if 11 players have been added
            if len(team.players) == 11:
                break

        for GW in range(1, 39):
            # Update the players to get game week
            for name, player in all_available_players.items():
                player.update_attributes(player_dict[name], GW)

            # Update Points in team
            team.update_team_points()

        # Set Fitness to the number of Points the team has
        genome.fitness = team.team_points

# Run for up to 300 generations.
winner = p.run(eval_genomes, 50)


In [None]:
for GW in range(7, 39):
    print(GW)
    for name, player in all_available_players.items():
        player.update_attributes(player_dict[name], GW)

In [None]:
# Create a network from the winner genome
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

# Get the outputs of the winner network for all players
output_dict = {}
for name, player in all_available_players.items():
    output = winner_net.activate(player.get_attributes())
    output_dict[name] = output

# Rank players by output, highest values first
ranked_players = sorted(output_dict.items(), key=lambda item: item[1], reverse=True)

# Now you can use the ranked_players list
ranked_players

In [None]:
# Create a network from the winner genome
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

# Create a new Team object
team = Team()

# Pass each Player Inputs to genome | Save Output in Dict
output_dict = {}
for name, player in all_available_players.items():
    output = winner_net.activate(player.get_attributes())
    output_dict[name] = output

# Rank players by output, highest values first
ranked_players = sorted(output_dict.items(), key=lambda item: item[1], reverse=True)

# Iterate over the sorted list, adding players to the team
for player_name, _ in ranked_players:
    player = all_available_players[player_name]
    # Check if the player can be added based on position and budget
    if team.can_player_be_added_based_on_position(player):
        team.add_player(player)
                
        # Stop if 11 players have been added
        if len(team.players) == 11:
            break

# Now you can view the team object
for player in team.players:
    print(f'{player.name}: {player.position}')


In [None]:
##################################################################################################
## -- Setup NEAT structure
##################################################################################################

# Specify the path to your configuration file
config_path = "../config.txt"

# Load configuration
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)

# Create the population
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
p.add_reporter(neat.StatisticsReporter())


def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)

        # Initialize fitness
        genome.fitness = 0
        
        # Create Team Object
        team = Team()

        # GAME WEEK 0 | INITIAL SELECTION
        output_dict = {}

        # Pass each Player Inputs to genome | Save Output in Dict
        for name, player in all_available_players.items():
            output = net.activate(player)
            output_dict[name] = output

        # Create a list of tuples (player, output) from the network output
        indexed_output = [(player, output_dict[name]) for name, player in all_available_players.items()]


        # Sort the list by value in descending order
        indexed_output.sort(key=lambda x: x[1], reverse=True)

        # Iterate over the sorted list, adding players to the team
        for player, _ in indexed_output:
            
            # Check if the player can be added based on position and budget
            if team.can_add_player_based_on_position(player) and team.dynamic_budget >= player.value:
                team.add_player(player)
                
                # Stop if 11 players have been added
                if len(player.players) == 11:
                    break

        # GAME WEEK 1+ | PLAY GAME
        # Loop through each Game Week
        for game_week in range(1,39):
            # Update Player stats for Game Week

            # Update player attributes
            for name, player in all_available_players.items():
                player.update_attributes(player_dict[name], game_week)


            # Update budget based on new player values
            team.dynamic_budget = sum(player.value for player in team.players) - team.original_budget


            output_dict = {}

            # Pass each Player Inputs to genome | Save Output in Dict
            for player in all_available_players:
                player_input = prepare_input_for_player(player,0)
                output = net.activate(player_input)
                output_dict[player.name] = output

            genome.fitness = team.team_points

# Run until a solution is found.
winner = p.run(eval_genomes, 50) 


