In [495]:
# Dependencies
import pandas as pd
import json
import numpy as np
import jenkspy as jpy

# TRL - Ranking Algorithm

### This project uses WAR to determine the rankings of specific TRL players. Configuration options for the algorithm can be found in the cell below.

In [496]:
# Configuration Options

# Minimum threshold of games needed for a player in order to receive a rating:
games_threshold = 15

# WAR Calculation multipliers. Must add to 1
offense_multiplier = 0.45
defense_multiplier = 0.45
support_multiplier = 0.1

# Set as False to use classical WAR instead of cWAR
use_cwar = True
# cWAR Coefficient scalar
cwar_scalar = 1

# Part 1: Data Wrangling

### In this cell, we can determine which seasons we want to use for WAR computation

In [497]:
# Load all CSV files
Fall2022   = pd.read_csv('csv/per_season/Fall2022.csv', delimiter=';')
Fall2023   = pd.read_csv('csv/per_season/Fall2023.csv', delimiter=';')
Spring2023 = pd.read_csv('csv/per_season/Spring2023.csv', delimiter=';')
Spring2024 = pd.read_csv('csv/per_season/Spring2024.csv', delimiter=';')
Summer2023 = pd.read_csv('csv/per_season/Summer2023.csv', delimiter=';')
Winter2023 = pd.read_csv('csv/per_season/Winter2023.csv', delimiter=';')
Winter2024 = pd.read_csv('csv/per_season/Winter2024.csv', delimiter=';')

all_seasons = [Fall2022, Winter2023, Spring2023, Summer2023, Fall2023, Winter2024, Spring2024]

# Using all season data:
# seasons = all_seasons

# Using only last 4 seasons:
seasons = [Summer2023, Fall2023, Winter2024, Spring2024]

# Index of first season we want to use (do not modify)
start_season = None
for i, season in enumerate(all_seasons):
    if id(season) == id(seasons[0]):
        start_season = i + 1
        break

### Core merging / data wrangling algorithm. 

#### If additional usernames need to be added for a player, modify the `json/username_mapping.json` file and re-run these cells.

In [498]:
def merge_by_username_per_season(seasons_list):
    """
    Merges and aggregates statistics by player name for each DataFrame in the list separately.
    
    Parameters:
    seasons (list of pd.DataFrame): List of DataFrames to process.
    
    Returns:
    list of pd.DataFrame: List of DataFrames with statistics aggregated by player name for each season.
    """
    
    # Load JSON data from file into a Python dictionary
    with open('json/username_mapping.json', 'r') as file:
        username_mapping = json.load(file)
    
    def merge_and_aggregate(df):
        # Make all player names lowercase
        df['player name'] = df['player name'].str.lower()
        
        # Replace mapped names with their dictionary value
        df['player name'] = df['player name'].map(username_mapping).fillna(df['player name'])
        
        # Choose the statistics we actually want to use. These are total statistics across a season 
        # (i.e. none of these can be averages of any sort)
        columns_to_aggregate = [
            # Core statistics
            'games', 'wins', 'goals', 'assists', 'saves', 'shots', 
            # Other helpful, but not core statistics
            'shots conceded', 'goals conceded', 'amount stolen', 'amount used while supersonic', 
            # Time statistics
            'time supersonic speed', 'time on ground', 'time low in air', 'time high in air',
            'time in front of ball', 'time behind ball', 'time defensive third', 'time neutral third', 'time offensive third', 
            # Demolition statistics
            'demos inflicted', 'demos taken'
        ]
        
        # Merge all the statistics by adding them all up with respect to the player name
        grouped_by_name = df.groupby('player name')[columns_to_aggregate].sum().reset_index()
        
        return grouped_by_name
    
    # Process each season DataFrame
    merged_seasons = [merge_and_aggregate(season_df) for season_df in seasons_list]
    
    return merged_seasons


def calculate_per_game_per_season(seasons_list, threshold):
    """
    Calculate player statistics per game for each DataFrame in the list separately.
    
    Parameters:
    seasons (list of pd.DataFrame): List of DataFrames to process.
    games_threshold (int): Minimum number of games required to be included in the calculations.
    
    Returns:
    list of pd.DataFrame: List of DataFrames with per-game statistics for each season.
    """
    
    def calculate_per_game(season):
        # Calculate player statistics per game
        per_game_stats = season.copy()
        
        # Filter anyone with less than minimum required games
        per_game_stats = per_game_stats[per_game_stats['games'] >= threshold]
        
        # Calculate stats per game
        for col in ['wins', 'goals', 'assists', 'saves', 'shots', 'shots conceded', 'goals conceded', 'amount stolen',
                    'amount used while supersonic', 'time supersonic speed', 'time on ground', 'time low in air',
                    'time high in air', 'time in front of ball', 'time behind ball', 'time defensive third',
                    'time neutral third', 'time offensive third', 'demos inflicted', 'demos taken']:
            per_game_stats[col] = per_game_stats[col] / per_game_stats['games']
        
        # Rename wins to winrate now that it is a percentage:
        per_game_stats = per_game_stats.rename(columns={'wins': 'winrate'})
        
        # Drop unneeded columns
        per_game_stats.drop(columns=['games'], inplace=True)
        per_game_stats.reindex()
        
        return per_game_stats
    
    # Process each season DataFrame
    per_game_seasons = [calculate_per_game(season_df) for season_df in seasons_list]
    
    return per_game_seasons

### Data wrangling complete. 

#### All statistics necessary can be found in the `per_game_stats_by_season` variable, which is a list of dataframes sorted by oldest to most recent season.
#### Each dataframe in the list contains statistics-per-game values for that season, eg. Shots per Game for Spring 2024 

In [499]:
merged_seasons_by_username = merge_by_username_per_season(seasons)
per_game_stats_by_season = calculate_per_game_per_season(merged_seasons_by_username, games_threshold)

# Data wrangling complete. The per_game_stats_by_season is our final array which has every useful statistic in it.
# Display the results:
for i, season_df_per_game in enumerate(per_game_stats_by_season, start=start_season):
    print(f"Season {i} merged statistics:")
    display(season_df_per_game.head())

Season 4 merged statistics:


Unnamed: 0,player name,winrate,goals,assists,saves,shots,shots conceded,goals conceded,amount stolen,amount used while supersonic,...,time on ground,time low in air,time high in air,time in front of ball,time behind ball,time defensive third,time neutral third,time offensive third,demos inflicted,demos taken
0,alex,0.454545,1.5,0.590909,1.522727,4.272727,7.477273,2.886364,450.636364,292.409091,...,194.995455,152.216364,21.047727,84.672273,283.5875,176.189318,114.3375,77.733409,0.795455,1.045455
1,arby,0.604651,0.534884,0.55814,1.023256,1.953488,6.186047,1.976744,379.837209,112.790698,...,223.315116,124.659535,10.866047,96.40814,262.432558,169.613256,116.733256,72.493721,0.372093,0.930233
2,argon,0.711111,0.933333,0.688889,1.088889,2.755556,6.6,1.777778,574.555556,551.066667,...,187.756222,153.229111,15.797556,111.549556,245.235778,177.010222,103.333111,76.441111,1.4,0.933333
3,bamboo,0.604651,1.139535,0.581395,1.093023,3.465116,6.186047,1.976744,566.488372,287.27907,...,208.473488,133.974186,17.173488,98.682093,260.939535,163.095814,115.557442,80.968372,0.976744,0.860465
4,definitelyanna,0.604651,0.813953,0.627907,1.023256,3.093023,6.186047,1.976744,551.069767,339.976744,...,204.453023,141.557442,13.969767,99.66814,260.311628,173.39907,109.059302,77.52186,0.465116,0.697674


Season 5 merged statistics:


Unnamed: 0,player name,winrate,goals,assists,saves,shots,shots conceded,goals conceded,amount stolen,amount used while supersonic,...,time on ground,time low in air,time high in air,time in front of ball,time behind ball,time defensive third,time neutral third,time offensive third,demos inflicted,demos taken
0,aarav,0.28,0.72,0.64,2.48,2.8,8.96,3.04,556.48,428.48,...,195.2248,155.7932,14.7652,101.0036,264.78,189.182,106.3712,70.2308,1.4,0.84
1,argon,0.28,0.8,0.44,0.92,2.84,8.96,3.04,605.0,472.04,...,191.698,157.3444,17.1932,118.2196,248.0164,183.8364,112.6716,69.728,1.44,0.68
2,beeholder,0.28,0.44,0.32,0.96,1.2,8.96,3.04,220.2,79.08,...,222.3944,133.5748,9.6968,85.8668,279.8,215.13,107.6404,42.896,0.32,0.56
3,bern1s,0.409091,0.772727,0.545455,1.954545,2.454545,8.636364,2.909091,481.454545,429.954545,...,213.728182,128.695909,14.447727,108.182727,248.689091,179.211818,107.664091,69.995,0.727273,0.727273
4,deso,0.47619,0.809524,0.52381,0.642857,2.97619,6.880952,2.285714,547.428571,292.928571,...,194.870238,135.376667,17.905476,108.870238,239.282619,153.415476,109.353333,85.384286,1.02381,0.714286


Season 6 merged statistics:


Unnamed: 0,player name,winrate,goals,assists,saves,shots,shots conceded,goals conceded,amount stolen,amount used while supersonic,...,time on ground,time low in air,time high in air,time in front of ball,time behind ball,time defensive third,time neutral third,time offensive third,demos inflicted,demos taken
0,aarav,0.642857,1.321429,0.696429,1.803571,3.732143,6.089286,1.732143,589.839286,424.875,...,185.935357,157.55625,18.485179,97.931607,264.046071,167.900714,113.1725,80.904643,1.392857,0.642857
1,arby,0.753846,0.876923,0.569231,1.092308,2.4,6.123077,1.492308,449.2,174.6,...,235.813385,118.539077,13.353538,88.698923,279.007385,175.120154,121.892154,70.693538,0.676923,0.969231
2,argon,0.4,0.92,0.44,1.34,2.82,7.64,2.4,553.96,440.68,...,187.2382,153.6972,18.1972,112.2046,246.9272,174.6674,110.6966,73.7676,1.16,0.84
3,azapatos,0.681818,2.287879,0.363636,2.742424,4.924242,8.015152,2.106061,488.30303,296.969697,...,183.965455,149.313939,26.987576,79.636364,280.630909,189.015152,106.145303,65.106515,1.409091,0.787879
4,beeholder,0.428571,0.464286,0.321429,1.071429,1.357143,8.035714,2.607143,269.142857,108.392857,...,211.172857,135.405357,6.971071,86.721429,266.826786,193.223214,107.625357,52.701429,0.607143,0.535714


Season 7 merged statistics:


Unnamed: 0,player name,winrate,goals,assists,saves,shots,shots conceded,goals conceded,amount stolen,amount used while supersonic,...,time on ground,time low in air,time high in air,time in front of ball,time behind ball,time defensive third,time neutral third,time offensive third,demos inflicted,demos taken
0,arby,0.315789,0.631579,0.447368,0.894737,2.710526,7.421053,3.026316,589.105263,212.526316,...,213.256579,123.878158,15.714211,109.635526,243.211579,158.866579,112.590263,81.391053,0.763158,1.052632
1,argon,0.604651,0.976744,0.674419,0.883721,3.27907,7.0,2.139535,633.627907,432.651163,...,187.476744,155.380233,18.589302,119.036744,242.409767,162.207674,110.913023,88.325581,1.767442,0.72093
2,awe,0.555556,0.422222,0.488889,0.844444,1.777778,7.044444,1.866667,465.311111,237.711111,...,224.020222,118.697111,13.935778,94.590667,262.063111,180.058667,113.194,63.4,0.977778,0.644444
3,beeholder,0.434783,0.413043,0.456522,1.130435,1.630435,9.326087,2.847826,337.913043,153.76087,...,212.578043,149.674348,12.439783,104.196304,270.496304,199.381087,112.086739,63.224348,0.956522,0.5
4,bylebukong,0.630435,0.934783,0.565217,1.26087,3.304348,6.413043,1.891304,630.065217,266.5,...,218.622391,132.89587,15.701739,101.417391,265.801522,159.021087,120.973043,87.225435,1.043478,0.891304


# Part 2: Load team data

### Teammates and their data is sometimes used during WAR computation, so it is loaded here.

In [500]:
# Helpful methods for loading and locating teammate data

def get_statistic(name, statistics_df, statistic_name):
    # Filter the DataFrame for the given player name
    player_stats = statistics_df[statistics_df['player name'] == name]

    # Check if the player exists in the DataFrame
    if not player_stats.empty:
        # Return the desired statistic value
        return player_stats.iloc[0][statistic_name]
    
    # Return 0 if the player does not exist
    return 0

def get_teammate_stats(teams_one_season, statistics_df, statistic_name):
    # 1. create two new columns with the teammate names
    df_with_teammates = statistics_df.copy()
    df_with_teammates['teammate_1'] = ''
    df_with_teammates['teammate_2'] = ''
    
    for index, row in df_with_teammates.iterrows():
        name = row['player name']
        
        for team_list in teams_one_season:
            if name in team_list:
                teammates_list = team_list.copy()
                teammates_list.remove(name)
                
                if len(teammates_list) >= 2:
                    df_with_teammates.at[index, 'teammate_1'] = teammates_list[0]
                    df_with_teammates.at[index, 'teammate_2'] = teammates_list[1]
                elif len(teammates_list) == 1:
                    df_with_teammates.at[index, 'teammate_1'] = teammates_list[0]
        
    # 2. Load the necessary statistics and add to the dataframe
    df_with_teammates[f'teammate_{statistic_name}'] = 0.0
    for index, row in df_with_teammates.iterrows():
        
        df_with_teammates.at[index, f'teammate_{statistic_name}'] = (
                get_statistic(row['teammate_1'], statistics_df, statistic_name) + get_statistic(row['teammate_2'], statistics_df, statistic_name)
        )
        
        # Add any other necessary teammate statistics here...
    
    # Drop unneeded columns
    df_with_teammates.drop(columns=['teammate_1', 'teammate_2'], inplace=True)
    df_with_teammates.reindex()    
    
    return df_with_teammates

# Load the JSON file containing all the teams
with open('json/teams_per_season.json', 'r') as json_file:
    teams_per_season = json.load(json_file)

# Part 3: WAR Computation

### This is the core algorithm that makes the wheels turn. 
#### Note that the offense, defense, and support multipliers can be modified in the configuration cell, near the top of this notebook.

In [501]:
def calculate_war_per_season(seasons_list, offense_mult=offense_multiplier, defense_mult=defense_multiplier, support_mult=support_multiplier):
    """
    Calculate WAR for each player in each season DataFrame separately.
    
    Parameters:
    seasons (list of pd.DataFrame): List of DataFrames to process. This should be averages per game, sorted by season.
    offense_multiplier (float): Multiplier for offensive statistics.
    defense_multiplier (float): Multiplier for defensive statistics.
    support_multiplier (float): Multiplier for support statistics.
    
    Returns:
    list of pd.DataFrame: List of DataFrames with WAR calculated for each player in each season.
    """
    
    def calculate_war(player_stats_one_season, season_index):
        # Calculate averages for each statistic
        averages_one_season = player_stats_one_season[player_stats_one_season.select_dtypes(include='number').columns].mean()
        
        # Make a new dataframe to store the WAR computations
        rankings_one_season = player_stats_one_season.copy()
        
        # Pull teammate statistics
        teammate_stats_one_season = get_teammate_stats(teams_per_season.get(f"{season_index}"), player_stats_one_season, 'goals')
        
        # Calculate WAR
        rankings_one_season['WAR'] = (
            offense_mult * (
                + (player_stats_one_season['goals'] - averages_one_season['goals'])
                + (player_stats_one_season['assists'] - averages_one_season['assists']) * 0.75
                + (
                        (player_stats_one_season['shots'] - player_stats_one_season['goals'])
                      - (averages_one_season['shots'] - averages_one_season['goals'])
                ) * 0.33          
            ) +
            defense_mult * (
                + (player_stats_one_season['saves'] - averages_one_season['saves']) * 0.6
                - (player_stats_one_season['shots conceded'] - averages_one_season['shots conceded']) * 0.15
                - (player_stats_one_season['goals conceded'] - averages_one_season['goals conceded']) * 0.33
            ) + 
            support_mult * (
                + (player_stats_one_season['demos inflicted'] - averages_one_season['demos inflicted']) * 0.1
                - (player_stats_one_season['demos taken'] - averages_one_season['demos taken']) * 0.1
                + (player_stats_one_season['amount stolen'] - averages_one_season['amount stolen']) * 0.005
            )
        ) + 2
        
        # Store intermediate results
        rankings_one_season.to_csv(f'results/WAR/season_{season_index}.csv', index=False)
        
        # Drop every column except the ones we want to view
        rankings_one_season = rankings_one_season[['player name', 'WAR']]
        
        return rankings_one_season
    
    # Process each season DataFrame
    war_seasons = [calculate_war(season_df, i) for i, season_df in enumerate(seasons_list, start=start_season)]
    
    return war_seasons

In [502]:
# Run the calculate_war_per_season function
war_by_season = calculate_war_per_season(per_game_stats_by_season)

# Display the results
for i, season_df in enumerate(war_by_season, start=start_season):
    print(f"Season {i} WAR statistics:")
    display(season_df.head())

Season 4 WAR statistics:


Unnamed: 0,player name,WAR
0,alex,2.538258
1,arby,1.940707
2,argon,2.351007
3,bamboo,2.474241
4,definitelyanna,2.306486


Season 5 WAR statistics:


Unnamed: 0,player name,WAR
0,aarav,2.246019
1,argon,1.813639
2,beeholder,1.229459
3,bern1s,2.035007
4,deso,2.009248


Season 6 WAR statistics:


Unnamed: 0,player name,WAR
0,aarav,2.846832
1,arby,2.232611
2,argon,2.152549
3,azapatos,3.218837
4,beeholder,1.483019


Season 7 WAR statistics:


Unnamed: 0,player name,WAR
0,arby,1.804423
1,argon,2.262304
2,awe,1.745183
3,beeholder,1.424649
4,bylebukong,2.384138


# Part 3b: Experimental Clustering

### This is a test cell which uses the Fisher-Jenks algorithm to assign predicted tiers to players.

In [513]:
for i, season_df in enumerate(war_by_season, start=start_season):
    breaks = jpy.jenks_breaks(season_df['WAR'], n_classes=5)
    season_df['cluster'] = pd.cut(season_df['WAR'], bins=breaks, labels=['5', '4', '3', '2', '1'], include_lowest=True)
    
    print(f"Season {i} clusters:")
    display(season_df.head())
    
    cluster_averages = season_df.groupby('cluster', observed=True)['WAR'].mean().reset_index()
    cluster_averages.columns = ['cluster', 'average_WAR']
    
    print(f"season {i} cluster averages:")
    display(cluster_averages)

Season 4 clusters:


Unnamed: 0,player name,WAR,cluster
0,alex,2.538258,1
1,arby,1.940707,3
2,argon,2.351007,2
3,bamboo,2.474241,1
4,definitelyanna,2.306486,2


season 4 cluster averages:


Unnamed: 0,cluster,average_WAR
0,5,1.332787
1,4,1.600135
2,3,2.042665
3,2,2.290969
4,1,2.586448


Season 5 clusters:


Unnamed: 0,player name,WAR,cluster
0,aarav,2.246019,2
1,argon,1.813639,4
2,beeholder,1.229459,5
3,bern1s,2.035007,3
4,deso,2.009248,3


season 5 cluster averages:


Unnamed: 0,cluster,average_WAR
0,5,1.21445
1,4,1.659396
2,3,2.009313
3,2,2.228182
4,1,2.554204


Season 6 clusters:


Unnamed: 0,player name,WAR,cluster
0,aarav,2.846832,1
1,arby,2.232611,2
2,argon,2.152549,2
3,azapatos,3.218837,1
4,beeholder,1.483019,4


season 6 cluster averages:


Unnamed: 0,cluster,average_WAR
0,5,1.274902
1,4,1.584501
2,3,1.93071
3,2,2.235861
4,1,2.875388


Season 7 clusters:


Unnamed: 0,player name,WAR,cluster
0,arby,1.804423,4
1,argon,2.262304,3
2,awe,1.745183,4
3,beeholder,1.424649,5
4,bylebukong,2.384138,2


season 7 cluster averages:


Unnamed: 0,cluster,average_WAR
0,5,1.420745
1,4,1.794784
2,3,2.178331
3,2,2.449127
4,1,2.779418


# Part 4: Corrected WAR (cWAR)

### This sub-algorithm factors in performance of teammates to award bonuses for "carry" potential.

In [504]:
def calculate_cwar_per_season(seasons_list):
    """
    Calculate cWAR for each player in each season DataFrame separately.
    
    Parameters:
    seasons (list of pd.DataFrame): List of DataFrames to process. This should be WAR per player, sorted by season.
    scaling_factor (float): Multiplier for cWAR computation
    
    Returns:
    list of pd.DataFrame: List of DataFrames with cWAR calculated for each player in each season.
    """
    def calculate_cwar(player_war_one_season, season_index, scaling_factor=cwar_scalar):
        
        # Make a new dataframe which has the sum of WAR of both teammates:
        cwar_one_season = get_teammate_stats(teams_per_season.get(f"{season_index}"), player_war_one_season, 'WAR')
        
        # Drop rows where teammate's WAR sum is zero. This usually happens when a player was a sub i.e. the player
        # had no teammates.
        cwar_one_season = cwar_one_season[cwar_one_season['teammate_WAR'] != 0]
        
        # Compute I_p~t for each player, and put it in a column called 'performance'
        cwar_one_season['performance'] = cwar_one_season['WAR'] / cwar_one_season['teammate_WAR']
        
        # Calculate the average across the 'performance' column
        averages_one_season = cwar_one_season[cwar_one_season.select_dtypes(include='number').columns].mean()
        
        # Add statistic to dataframe, for viewing later - line can be commented out to de-clutter final dataframe
        cwar_one_season['avg_performance'] = averages_one_season['performance']
        
        # Finally, calculate the cWAR of each player
        cwar_one_season['cWAR'] = (
                cwar_one_season['WAR'] 
                + scaling_factor * (
                        (cwar_one_season['performance'] - averages_one_season['performance']) / averages_one_season['performance']
                )
        )
        
        # Store intermediate results
        cwar_one_season.to_csv(f'results/cWAR/season_{season_index}.csv', index=False)
        
        return cwar_one_season
        
    
    # Process each season DataFrame
    cwar_seasons = [calculate_cwar(season_df, i) for i, season_df in enumerate(seasons_list, start=start_season)]
    
    return cwar_seasons

In [505]:
# Run the calculate_cwar_per_season function
cwar_by_season = calculate_cwar_per_season(war_by_season)

# Display the results
for i, season_df in enumerate(cwar_by_season, start=start_season):    
    print(f"Season {i} cWAR:")
    display(season_df.head())

Season 4 cWAR:


Unnamed: 0,player name,WAR,cluster,teammate_WAR,performance,avg_performance,cWAR
0,alex,2.538258,1,3.286675,0.772287,0.515281,3.037029
1,arby,1.940707,3,2.898795,0.669487,0.515281,2.239974
2,argon,2.351007,2,4.496447,0.522859,0.515281,2.365714
3,bamboo,2.474241,1,3.509198,0.705073,0.515281,2.84257
4,definitelyanna,2.306486,2,3.676954,0.627282,0.515281,2.523845


Season 5 cWAR:


Unnamed: 0,player name,WAR,cluster,teammate_WAR,performance,avg_performance,cWAR
0,aarav,2.246019,2,3.043097,0.73807,0.516385,2.67532
1,argon,1.813639,4,3.475477,0.521839,0.516385,1.824199
2,beeholder,1.229459,5,4.059657,0.302848,0.516385,0.815935
3,bern1s,2.035007,3,3.443854,0.59091,0.516385,2.179326
4,deso,2.009248,3,4.064341,0.49436,0.516385,1.966595


Season 6 cWAR:


Unnamed: 0,player name,WAR,cluster,teammate_WAR,performance,avg_performance,cWAR
0,aarav,2.846832,1,4.337601,0.656315,0.515395,3.120253
1,arby,2.232611,2,5.095561,0.438148,0.515395,2.082733
2,argon,2.152549,2,3.854368,0.55847,0.515395,2.236126
3,azapatos,3.218837,1,3.117371,1.032549,0.515395,4.222249
4,beeholder,1.483019,4,3.916266,0.378682,0.515395,1.21776


Season 7 cWAR:


Unnamed: 0,player name,WAR,cluster,teammate_WAR,performance,avg_performance,cWAR
0,arby,1.804423,4,3.50915,0.514205,0.515953,1.801036
1,argon,2.262304,3,4.304759,0.525536,0.515953,2.280877
2,awe,1.745183,4,4.433533,0.393633,0.515953,1.508107
3,beeholder,1.424649,5,4.078158,0.349336,0.515953,1.101719
4,bylebukong,2.384138,2,4.162537,0.572761,0.515953,2.494241


# Part 5: WAR Weighting

### More recent seasons will receive a preferential weighting compared to older seasons.
### TODO: This cell currently runs an average across all seasons. Should be modified... at some point..

In [506]:
# Add weights for more recent seasons

# Construct a dictionary to store weighted WAR values for each player
# Dictionary format is - 'player name' : [list, of, WARs]
player_war_dict = {}

all_wars = pd.concat(cwar_by_season)
for _, row in all_wars.iterrows():
    if not row['player name'] in player_war_dict:
        player_war_dict[row['player name']] = []
    if use_cwar:
        player_war_dict[row['player name']].append(row['cWAR'])
    else:
        player_war_dict[row['player name']].append(row['WAR'])

for player_name in player_war_dict: print(f"{player_name} : {player_war_dict[player_name]}")

# Now, weight the WARs such that most recent ratings will be favored more
weighted_war_dict = {}
for player_name in player_war_dict:
    weighted_war_dict[player_name] = np.mean(player_war_dict[player_name])

alex : [3.037028691966605]
arby : [2.2399740650154034, 2.082732670034886, 1.801036291369186]
argon : [2.365713810813606, 1.824199454579041, 2.2361257513673842, 2.280876717884398]
bamboo : [2.8425701557078105]
definitelyanna : [2.5238450361674767]
deso : [2.5952567435354723, 1.9665954825036103, 2.2825689839431367, 2.500915148840343]
dies : [2.076916541880677, 1.4102743864032263, 1.6774138276417878]
domo : [1.3070582041168601]
front flip freddy : [1.5055806211849858, 1.2737088164975843, 1.7001044606365328, 1.3543815340076155]
g_llama : [1.4435104302811623, 1.3325475447146757, 0.9379789490036651, 1.0884835271706663]
hotshot : [3.177114170804425, 2.704612824709584, 2.588531500524332, 2.7447919499185907]
kameron : [1.2883802742484136]
king : [1.211557655142681, 1.634897442360931, 1.3489381153131939, 1.7882715646140035]
klosty : [1.9258992009573421]
leagueson : [2.39305576699906, 2.202177715238421, 2.1790519902986656]
leon : [2.825097448402752, 2.8262440620877842, 2.8747360597152314]
luma : 

In [507]:
# Convert dictionary to DataFrame
final_weighted_war = pd.DataFrame.from_dict(weighted_war_dict, orient='index', columns=['WAR'])

# Reset index to make player names a column
final_weighted_war.reset_index(inplace=True)

# Rename the columns
final_weighted_war.rename(columns={'index': 'player name'}, inplace=True)

# Sort by WAR and print final results to a csv
final_weighted_war = final_weighted_war.sort_values(by='WAR', ascending=False)
final_weighted_war.to_csv('results/final_war.csv', index=False)

# Part 6: Team Creation

#### This is using a greedy algorithm to create teams. Essentially, each team tries to make the highest total WAR team that they possibly can. Each team picks the highest rated player out of the remaining players. Then, whichever team has the lowest total WAR gets to pick next.

#### Note that this is not a perfect, be-all-end-all solution as it does not exhaustively test all combinations of teams. However, it does get pretty close.

In [508]:
# Signups for Spring 2024. Mostly used as a test.
# playerlist = ['kade', 'mini', 'peak', 'leon', 'snipey', 'greensleeves', 'terminator', 'bylebukong', 'hotshot', 'rubber ducky', 'deso',
#                      'leagueson', 'vpr.vnm', 'tipsy', 'ral days', 'argon', 'fernado', 'pops', 'capi', 'senor brightside', 'arby', 'toucan', 
#                      'tophatbear', 'wika', 'testie', 'waycey', 'king', 'awe', 'phrez', 'front flip freddy', 'lukethighwalkr4', 'dies', 'g_llama',
#                      'renshirokamazaki', 'elatedthug', 'beeholder', 'mistermirz', 'uday', 'gangster.goose']

# Signups for Summer 2024 (so far)
playerlist = ['leagueson', 'deso', 'tortle', 'testie', 'argon', 'snipey', 'lukethighwalkr4', 'fernado', 'elatedthug', 'penquan', 'vpr.vnm', 'idoit', 'king',
              'tipsy', 'g_llama', 'sales', 'toucan', 'front flip freddy', 'rubber ducky', 'luma', 'domo', 'kail', 'renshirokamazaki', 'roo', 'jmc', 'greensleeves', 
              'hotshot', 'pastamafia']

playerlist = final_weighted_war[final_weighted_war['player name'].isin(playerlist)]

# Sort by WAR
playerlist = playerlist.sort_values(by='WAR', ascending=False)

breaks = jpy.jenks_breaks(playerlist['WAR'], n_classes=4)
playerlist['predicted_tier'] = pd.cut(playerlist['WAR'], bins=breaks, labels=['tier4', 'tier3', 'tier2', 'tier1'], include_lowest=True)

print(f"clusters:")
print(playerlist)

players = playerlist.to_dict('records')

# Initialize teams
teams = [[] for _ in range(len(playerlist) // 3)]
team_wars = [0] * (len(playerlist) // 3)

# List to store full teams
full_teams = []

# Assign players to teams greedily
for player in players:
    if len(teams) == 0:
        break  # If all teams are already full, break the loop
    
    # Find the team with the lowest WAR
    best_team_index = np.argmin(team_wars)
    teams[best_team_index].append(player)
    team_wars[best_team_index] += player['WAR']
    
    # Check if the team is full (3 players)
    if len(teams[best_team_index]) == 3:
        full_teams.append(teams[best_team_index])
        teams.pop(best_team_index)
        team_wars.pop(best_team_index)

# Display the full teams
print("Full Teams:")
for i, team in enumerate(full_teams):
    print(f"Team {i+1}:")
    for player in team:
        print(f"\t{player['player name']} (WAR: {player['WAR']})")
    print(f"\t\tTeam WAR: {sum(player['WAR'] for player in team)}")

# If there are any incomplete teams left, display them as well
if teams:
    print("\nIncomplete Teams:")
    for i, team in enumerate(teams):
        print(f"Incomplete Team {i+1}:")
        for player in team:
            print(f"\t{player['player name']} (WAR: {player['WAR']})")
        print(f"\t\tTeam WAR: {sum(player['WAR'] for player in team)}")

clusters:
          player name       WAR predicted_tier
39       greensleeves  2.871027          tier1
10            hotshot  2.803763          tier1
25       rubber ducky  2.560689          tier1
66             snipey  2.506007          tier1
5                deso  2.336334          tier2
33            vpr.vnm  2.275322          tier2
31              tipsy  2.263131          tier2
14          leagueson  2.258095          tier2
64            fernado  2.185162          tier2
2               argon  2.176729          tier2
16               luma  2.039852          tier2
32             toucan  1.931992          tier2
67             testie  1.596400          tier3
20            penquan  1.586699          tier3
12               king  1.495916          tier3
8   front flip freddy  1.458444          tier3
7                domo  1.307058          tier4
41    lukethighwalkr4  1.275676          tier4
63         elatedthug  1.243075          tier4
23   renshirokamazaki  1.207086          tier4
9  