# Imports

In [1]:
import pandas as pd
import numpy as np 
import json 
import os
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from sklearn.preprocessing import MinMaxScaler

In [2]:
playercard_data = r'.\output\playercard_stats.csv'
playercard_df = pd.read_csv(playercard_data)
playercard_df

Unnamed: 0,name_index,ranking,win rate,Overall,Offense,Defense,Aggression,Speed,discord_username,playercard_name,img_filepath,rank
0,ix mini,0,0.6,99.0,99.3,52.34,72.85,88.97,,,,
1,invincible,1,0.4,98.37,80.18,42.62,100.0,87.23,,,,
2,alex,2,0.56,96.72,100.0,60.39,66.57,74.01,,,,
3,shaunch,3,0.55,95.7,77.58,53.87,78.91,85.05,,,,
4,steve,4,0.51,90.68,83.84,67.25,44.35,72.5,,,,
5,inf3ct3ds0ldi3r,5,0.48,88.56,67.5,87.81,44.93,56.13,,,,
6,pacific_gull,6,0.52,88.1,82.75,66.75,56.32,48.0,,,,
7,muffled,7,0.48,86.68,69.78,42.1,64.49,69.68,,,,
8,luisito,8,0.46,86.5,77.98,73.88,55.6,37.63,Luisito#0412,Luisito,,
9,greensleeves,9,0.5,85.37,81.36,41.66,74.25,41.62,,,,


# Helper Functions 

## Helper Functions for cleaning data from Ballchasing.com

In [3]:
def get_totals(df):
    # Select only the columns that contain the total counts for each statistic over the course of the season.
    total = df[[
    'team name', 'player name', 'games', 'wins', 'score', 'goals', 'assists', 'saves', 'shots', 'shots conceded', 'goals conceded',
    'goals conceded while last defender', 'amount collected', 'amount collected big pads', 'amount collected small pads',  
    'count collected big pads', 'count collected small pads','amount stolen', 'amount stolen big pads', 'amount stolen small pads',
    'count stolen big pads', 'count stolen small pads', '0 boost time', '100 boost time', 'amount used while supersonic',
    'amount overfill total','amount overfill stolen', 'total distance', 'time slow speed', 'time boost speed', 'time supersonic speed',
    'time on ground', 'time low in air', 'time high in air', 'time powerslide','count powerslide','time most back', 'time most forward',
    'time in front of ball','time behind ball', 'time defensive half', 'time offensive half', 'time defensive third', 'time neutral third',
    'time offensive third','demos inflicted', 'demos taken'
    ]]
    # Lowercase all playernames for consistency.
    total['player name'] = total["player name"].str.lower() 
    # Since we only have a dataframe of total statistics, we can group by player to account for BallChasing.com accidently mismanaging 
    # team and player relationships. This is acting as a merge between rows where playernames match but the teams names didnt. 
    cleaned_data = total.groupby(['player name']).sum().reset_index()
    return cleaned_data

In [4]:
def average_player_statistics(cleaned_data):
    # In order to round the data to the nearest thousandth, we need to remove all columns without int datatypes. I divide the total
    # statistics for every player by the total number of (recorded) games that they have played and uploaded replays. 
    per_game_stats = cleaned_data.iloc[:,2:].div(cleaned_data.games, axis=0).round(4)
    # This is theseries that contains all columns without int datatypes, in other words all of the players. 
    players = cleaned_data.iloc[:,:2]
    # Then we merge these back together for the final dataframe.
    final_df = pd.concat([players, per_game_stats], axis=1)
    # return df so we can 
    return final_df

In [5]:
def remove_player_statistics(df, lst):
    # Provide as input a list of player names to be drop from this dataframe. 
    return df[~df['player name'].isin(lst)]

In [6]:
def patch_duplicates(df, dictionary):
    all_players = [list(dictionary.keys()), list(dictionary.values())]
    flattened = [item for sublist in all_players for item in sublist]
    # data = df.set_index('player name', drop = True, verify_integrity = True)
    data = df.set_index('player name', drop = True)
    # Remove players 
    patched_data = data.loc[flattened]
    patched_data = patched_data.rename(index=dictionary)
    patched_data = patched_data.reset_index(drop = False)\
        .groupby(['player name']).sum()\
            .reset_index(drop=False)
    # Removed players that we combined so we can recombine them
    removed_duplicates = remove_player_statistics(df, flattened).reset_index(drop = True)
    # Combine the two
    patched_dataframe = pd.concat([removed_duplicates, patched_data])
    return patched_dataframe.reset_index(drop=True)

## Helper Functions for generating player standings 

Current Season = TRL Winter 2021 Tournament

Win Rate

Offensive Rating
- score per game + shots per game + assists per game + shooting percentage + time most forward per game + time offensive half per game + time neurtal third per game + time offensive third per game

Defensive Rating
- score per game + saves per game + goals conceded per game + goals conceded while last defender per game + time most back per game + time defensive half per game + time neurtal third per game + time defensive third per game

Aggression
- shots per game + average speed per game + amount stolen per game + amount stolen big pads per game + amount stolen small pads per game + time offensive half per game + demos inflicted per game + demos taken per game + time powerslide per game + avg powerslide time per game + count powerslide per game

Speed
- bpm per game + avg boost amount per game + time supersonic per game +total distance per game + avg speed per game + average amount used while supersonic per game + amount collected per game + amount collected big pads per game + amount collected small pads per game + 0 boost time per game + 100 boost time per game + amount used while supersonic per game + time slow speed per game + time boost speed per game + time supersonic speed per game

In [7]:
def offensive_stats(df):
    # Select all columns with relevant data 
    Offensive = df[[
        'player name', 'score', 'shots', 'assists',
        'time most forward','time offensive half',
        'time neutral third','time offensive third'
        ]]
    # Create a new column with the calculated offensive rating value based on original column data calculations
    Offensive['Offensive Rating'] = .05*Offensive['score'] + .2*Offensive['shots'] \
                                + .05*Offensive['assists'] + .05*Offensive['time most forward'] + .05*Offensive['time neutral third'] \
                                + .1*Offensive['time offensive third'] 
    # The we use the MinMaxScaler from SKLearn to normalize all of the values and to generate ranks. 
    scaler = MinMaxScaler()
    Offensive['Offensive Rating']  = scaler.fit_transform(Offensive[['Offensive Rating']])
    # Return the DataFrame containing [player name, offensive rating] 
    # Columns in sorted order with respect to Offensive rating  
    return Offensive.sort_values('Offensive Rating',ascending=False)[['player name', 'Offensive Rating']]

In [8]:
def defensive_stats(df):
    # Select all columns with relevant data 
    Defensive = df[[
        'player name', 'score', 'saves','goals conceded','goals conceded while last defender',
        'time most back','time defensive half','time neutral third','time defensive third'
        ]]
    # Create a new column with the calculated defensive rating value based on original column data calculations
    Defensive['Defensive Rating'] = .05* Defensive['score']  + .2*Defensive['saves'] + .05*Defensive['goals conceded'] +\
                                    .1*Defensive['goals conceded while last defender']+ .1*Defensive['time most back'] +\
                                    .15*Defensive['time defensive half'] + .1 * Defensive['time neutral third'] +\
                                    .05 * Defensive['time defensive third']
    # The we use the MinMaxScaler from SKLearn to normalize all of the values and to generate ranks. 
    scaler = MinMaxScaler()
    Defensive['Defensive Rating']  = scaler.fit_transform(Defensive[['Defensive Rating']])
    # Return the DataFrame containing [player name, defensive rating] 
    # Columns in sorted order with respect to Offensive rating  
    return Defensive.sort_values('Defensive Rating',ascending=False)[['player name', 'Defensive Rating']]

In [9]:
def aggression_stats(df):
    # Select all columns with relevant data 
    Aggression = df[[
        'player name', 'shots', 'amount stolen', 'amount stolen big pads', 'amount stolen small pads',
        'time offensive half','demos inflicted','demos taken','time powerslide','count powerslide'
        ]]
    # Create a new column with the calculated defensive rating value based on original column data calculations
    Aggression['Aggression Rating'] = .2*Aggression['shots'] +.025*Aggression['amount stolen']+ .05*Aggression['amount stolen big pads']+\
                                        .025*Aggression['amount stolen small pads'] + .075*Aggression['time offensive half'] +\
                                        .2*Aggression['demos inflicted'] + .025*Aggression['demos taken'] +\
                                        .05*Aggression['time powerslide'] + .15*Aggression['count powerslide']
    # The we use the MinMaxScaler from SKLearn to normalize all of the values and to generate ranks.
    scaler = MinMaxScaler()
    Aggression['Aggression Rating']  = scaler.fit_transform(Aggression[['Aggression Rating']])
    # Return the DataFrame containing [player name, aggression rating] 
    # Columns in sorted order with respect to aggression rating  
    return Aggression.sort_values('Aggression Rating',ascending=False)[['player name', 'Aggression Rating']]

In [10]:
def speed_stats(df):
    # Select all columns with relevant data 
    Speed = df[[
        'player name','total distance','amount collected','amount collected big pads','amount collected small pads',
        '0 boost time','100 boost time','amount used while supersonic','time slow speed','time boost speed','time supersonic speed'
        ]]
    # Create a new column with the calculated defensive rating value based on original column data calculations
    Speed['Speed Rating'] = + 4*Speed['total distance']\
                        + (-3*Speed['amount used while supersonic']) + 1*Speed['amount collected']\
                        + 1*Speed['amount collected big pads'] + 2*Speed['amount collected small pads'] + (-3*Speed['0 boost time'])\
                        + 3*Speed['100 boost time'] + -1*Speed['time slow speed'] + 1*Speed['time boost speed']\
                        + 3*Speed['time supersonic speed']
    # The we use the MinMaxScaler from SKLearn to normalize all of the values and to generate ranks.
    scaler = MinMaxScaler()
    # Return the DataFrame containing [player name, speed rating] 
    # Columns in sorted order with respect to speed rating  
    Speed['Speed Rating'] = scaler.fit_transform(Speed[['Speed Rating']])
    return Speed.sort_values('Speed Rating',ascending=False)[['player name', 'Speed Rating']]

## Helper Functions to Generate Final Overall Standings DataFrame

In [11]:
def translate(value, leftMin, leftMax, rightMin, rightMax):
        """ Mapping a range of values to another """ 

        # Figure out how 'wide' each range is
        leftSpan = leftMax - leftMin
        rightSpan = rightMax - rightMin
        
        # Convert the left range into a 0-1 range (float)
        valueScaled = float(value - leftMin) / float(leftSpan)
        
        # Convert the 0-1 range into a value in the right range.
        return rightMin + (valueScaled * rightSpan)

In [12]:
def generate_overall_standings(df, offensive_stats, defensive_stats, aggression_stats, speed_stats):
    # Take name and win rate from cleaned dataframe
    final_df = df[['player name', 'wins']]
    # Add the offensive player ratings to final df by merging on playername.
    final_df = final_df.merge(offensive_stats, how='left')
    # Add the defensive player ratings to final df by merging on playername.
    final_df = final_df.merge(defensive_stats, how='left')
    # Add the aggression player ratings to final df by merging on playername.
    final_df = final_df.merge(aggression_stats, how='left')
    # Add the speed player ratings to final df by merging on playername.
    final_df = final_df.merge(speed_stats, how='left')


    feature_weights = {
        'off': .25, 
        'def': .25, 
        'agro': .25, 
        'spdy': .25
    }
    # Formatting by reordering columns in a readable format. 
    overalls = final_df[['player name','wins','Offensive Rating', 'Defensive Rating', 'Aggression Rating', 'Speed Rating']]

    # Generate overall standings based on all generated stats
    overalls['Total Overall'] = feature_weights['off']*overalls['Offensive Rating'] +\
                                feature_weights['def']*overalls['Defensive Rating'] +\
                                feature_weights['agro']*overalls['Aggression Rating'] +\
                                feature_weights['spdy']*overalls['Speed Rating']
    
    # Scaler for normalization                             
    scaler = MinMaxScaler()
    # Normalize
    overalls['Total Overall'] = scaler.fit_transform(overalls[['Total Overall']])
    # Sort values by total overall score
    total_overalls = overalls.sort_values('Total Overall',ascending=False)

    # Formatting for easier human readability
    total_overalls.reset_index(drop=True).round(3)
    total_overalls['Offensive Rating'] = total_overalls['Offensive Rating'] *100 
    total_overalls['Defensive Rating'] = total_overalls['Defensive Rating'] *100 
    total_overalls['Aggression Rating'] = total_overalls['Aggression Rating'] *100 
    total_overalls['Speed Rating'] = total_overalls['Speed Rating'] *100 
    total_overalls['Total Overall'] = total_overalls['Total Overall'] *100 

    # Display the final dataframe for people to inspect
    results = total_overalls[[
        'player name', 'wins', 'Total Overall',
        'Offensive Rating', 'Defensive Rating', 'Aggression Rating', 'Speed Rating']]\
            .round(2).reset_index(drop=True)\
                .rename(columns={
                    "player name": "participant",
                    'wins': 'win rate',
                    "Total Overall": "Overall",
                    'Offensive Rating': 'Offense',
                    'Defensive Rating': 'Defense',
                    'Aggression Rating':'Aggression',
                    'Speed Rating': 'Speed'})

    return results

In [None]:
def add_relevant_metadata(standings, names_fp, playercard_imgs, rank_fp):
    df = standings.set_index('participant')
    # # Merge the player specified names
    player_names_fp = r'.\output\playercard_names.csv'
    player_names_df = pd.read_csv(player_names_fp, index_col=0)
    player_names_df = player_names_df.drop_duplicates(subset=['message_author', 'leaderboard_name'], keep='last')
    cols = {'message_author': 'discord_username', 'leaderboard_name': 'ballchasing_username'}
    names_df = player_names_df.rename(columns = cols)
    names_df.ballchasing_username = names_df.ballchasing_username.str.lower()
    names_df= names_df.set_index('ballchasing_username', drop = True)
    merged_names = df.merge(names_df, left_index = True, right_index = True, how = 'outer')
    merged_names = merged_names.reset_index(drop = False) # Reset the index here to preserve original playercard rank data
    old_index = {'index': 'name_index'}
    merged_names = merged_names.rename(columns = old_index)

    # Merge the playercard image paths 
    x = os.listdir("./playercard_imgs")
    start_directory = 'c:\\Users\\dmarc\\OneDrive\\Documents\\Github\\Personal Projects\\TritonRL-ScoreBot'
    abs_path = start_directory + '\\playercard_imgs\\'
    file_paths = [abs_path + i for i in x]
    names = [name.strip('.png') for name in x]
    fp_df = {'avatar_username': names, 'img_filepath': file_paths}
    fp_df = pd.DataFrame(data = fp_df)
    merged_names_paths = merged_names.merge(fp_df,  left_on = 'discord_username', right_on='avatar_username', how = 'outer')

    # Merge the players associated rank 
    player_ranks = 'output/player_ranks.json'
    def load_params(fp):
        with open(fp) as fh:
            param = json.load(fh)
        return param
    ranks = load_params(player_ranks)
    cols = {'index': 'rank_username', 0: 'rank'}
    rank_df = pd.DataFrame.from_dict(ranks,orient='index').reset_index(drop = False).rename({'index': 'player', '0': 'rank'})
    rank_df = rank_df.rename(columns = cols)
    merged_names_paths_ranks = merged_names_paths.merge(rank_df, left_on = 'discord_username', right_on='rank_username', how = 'outer')

    # Clean Final DataFrame
    cleaned_df = merged_names_paths_ranks.sort_values('Overall', ascending = False)
    cleaned_df = cleaned_df.reset_index(drop=True) # Remove old index 
    cleaned_df = cleaned_df.reset_index(drop=False) # Add new column based on dataframe ranked by Overall.
    cleaned_df = cleaned_df.rename(columns={'index': 'ranking'})
    cleaned_df = cleaned_df.fillna('N/A')
    cleaned_df['img_filepath'] = cleaned_df['img_filepath'].str.replace('\\', '/')
    cleaned_df = cleaned_df.drop(columns = ['avatar_username', 'rank_username']).reset_index(drop = True).set_index('name_index')

    return cleaned_df

# Load in Fall 2020 player data

In [14]:
fall_2020 = 'data\TRL_F20_playerdata_final.csv'
df = pd.read_csv(fall_2020)
fall_2020_totals = get_totals(df)
fall_2020_averages = average_player_statistics(fall_2020_totals)

# Load in Data

In [15]:
winter_2021 = 'data\TRL_wk3_data.csv'
winter_2021 = pd.read_csv(winter_2021)

In [23]:
winter_2021_totals = get_totals(winter_2021)
# winter_2021_totals

# Calculate Overalls 

In [24]:
# Load in Data 
winter_2021_totals = get_totals(winter_2021)

# Make quick changes as needed
name_changes = {
    'invincibleblaze': 'invincible',
    'nsdlakers4': 'shaunch', 
    'monkensteinr': 'monkenstein', 
    'minimy_ugf': 'minimug'
    } 
    
players_to_drop = [
    'squishy', 
    'tag cramification', 
    'yegs'
    ]#, 'goofy']

In [25]:
# Using the name changes, merge the totals for data in each column
patched_totals = patch_duplicates(winter_2021_totals, name_changes)
# Calculate the average for each statistic 
winter_2021_averages = average_player_statistics(patched_totals)
# Remove any remaining unwanted players by name
trl_winter_players = remove_player_statistics(winter_2021_averages, players_to_drop)

# Calculate Replay Statistics 
offensive_df = offensive_stats(trl_winter_players)
defensive_df = defensive_stats(trl_winter_players)
aggression_df = aggression_stats(trl_winter_players)
speed_df = speed_stats(trl_winter_players)

# Calculate Standings 
standings = generate_overall_standings(trl_winter_players, offensive_df, defensive_df, aggression_df, speed_df)

# Scale the overalls to be between the interval of 58 - 99
standings['Overall'] = pd.Series([translate(ovr, 0, 100, 58, 99) for ovr in standings.Overall]).round(2)

In [26]:
player_names = 'output/playercard_names.csv'
player_imgs_dir = './playercard_imgs'
player_ranks = 'output/player_ranks.json'
playercard_data = add_relevant_metadata(standings, player_names, player_imgs_dir, player_ranks)
playercard_data

Unnamed: 0_level_0,ranking,win rate,Overall,Offense,Defense,Aggression,Speed,discord_username,playercard_name,img_filepath,rank
name_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ix mini,0,0.6,99.0,99.3,52.34,72.85,88.97,,,,
invincible,1,0.4,98.37,80.18,42.62,100.0,87.23,,,,
alex,2,0.56,96.72,100.0,60.39,66.57,74.01,ohAlex_#9493,Alex,,
shaunch,3,0.55,95.7,77.58,53.87,78.91,85.05,Shaunch#6025,Shaunch,,Champion I
steve,4,0.51,90.68,83.84,67.25,44.35,72.5,,,,
inf3ct3ds0ldi3r,5,0.48,88.56,67.5,87.81,44.93,56.13,CommanderBoy#6172,INF3CT3DS0LDI3R,c:/Users/dmarc/OneDrive/Documents/Github/Perso...,
pacific_gull,6,0.52,88.1,82.75,66.75,56.32,48.0,,,,
muffled,7,0.48,86.68,69.78,42.1,64.49,69.68,,,,
luisito,8,0.46,86.5,77.98,73.88,55.6,37.63,Luisito#0412,Luisito,c:/Users/dmarc/OneDrive/Documents/Github/Perso...,
greensleeves,9,0.5,85.37,81.36,41.66,74.25,41.62,,,,


In [27]:
# Save the new dataframe as Player Statistics 
playercard_data.to_csv('output/playercard_stats.csv', index=True)

In [36]:
# Processing for playercard_name function

# player_names_fp = r'.\output\playercard_names.csv'
# df = pd.read_csv(player_names_fp, index_col=0)
# df = df.drop_duplicates(subset=['message_author', 'leaderboard_name'], keep='last')
# df = df.set_index('message_author')

# try:
#     msg_author = 'ohAlex_#8725'
#     ballchasing_name = df.loc[msg_author]['leaderboard_name']
#     playercard_name = df.loc[msg_author]['playercard_name']
# except:
#     # await message.channel.send('You must first use the "!playercard_name" command before viewing your playercard.')
#     print('You must first use the "!playercard_name" command before viewing your playercard.')


# output_df = df.reset_index()
# player_names_fp = r'.\output\playercard_names.csv'
# output_df.to_csv(player_names_fp)

# df

You must first use the "!playercard_name" command before viewing your playercard.


Unnamed: 0_level_0,leaderboard_name,playercard_name
message_author,Unnamed: 1_level_1,Unnamed: 2_level_1
test,test,test
Goofy#8725,goofy,GFY


In [19]:
# def add_relevant_metadata(standings, names_fp, playercard_imgs, rank_fp):
#     df = standings.set_index('participant')
#     # # Merge the player specified names
#     player_names_fp = r'.\output\playercard_names.csv'
#     player_names_df = pd.read_csv(player_names_fp, index_col=0)
#     player_names_df = player_names_df.drop_duplicates(subset=['message_author', 'leaderboard_name'], keep='last')
#     cols = {'message_author': 'discord_username', 'leaderboard_name': 'ballchasing_username'}
#     names_df = player_names_df.rename(columns = cols)
#     names_df.ballchasing_username = names_df.ballchasing_username.str.lower()
#     names_df= names_df.set_index('ballchasing_username', drop = True)
#     merged_names = df.merge(names_df, left_index = True, right_index = True, how = 'outer')
#     merged_names = merged_names.reset_index(drop = False) # Reset the index here to preserve original playercard rank data
#     old_index = {'index': 'name_index'}
#     merged_names = merged_names.rename(columns = old_index)

#     # Merge the playercard image paths 
#     x = os.listdir("./playercard_imgs")
#     start_directory = 'c:\\Users\\dmarc\\OneDrive\\Documents\\Github\\Personal Projects\\TritonRL-ScoreBot'
#     abs_path = start_directory + '\\playercard_imgs\\'
#     file_paths = [abs_path + i for i in x]
#     names = [name.strip('.png') for name in x]
#     fp_df = {'avatar_username': names, 'img_filepath': file_paths}
#     fp_df = pd.DataFrame(data = fp_df)
#     merged_names_paths = merged_names.merge(fp_df,  left_on = 'discord_username', right_on='avatar_username', how = 'outer')

#     # Merge the players associated rank 
#     player_ranks = 'output/player_ranks.json'
#     def load_params(fp):
#         with open(fp) as fh:
#             param = json.load(fh)
#         return param
#     ranks = load_params(player_ranks)
#     cols = {'index': 'rank_username', 0: 'rank'}
#     rank_df = pd.DataFrame.from_dict(ranks,orient='index').reset_index(drop = False).rename({'index': 'player', '0': 'rank'})
#     rank_df = rank_df.rename(columns = cols)
#     merged_names_paths_ranks = merged_names_paths.merge(rank_df, left_on = 'discord_username', right_on='rank_username', how = 'outer')

#     # Clean Final DataFrame
#     cleaned_df = merged_names_paths_ranks.sort_values('Overall', ascending = False)
#     cleaned_df = cleaned_df.reset_index(drop=True) # Remove old index 
#     cleaned_df = cleaned_df.reset_index(drop=False) # Add new column based on dataframe ranked by Overall.
#     cleaned_df = cleaned_df.rename(columns={'index': 'ranking'})
#     cleaned_df = cleaned_df.fillna('N/A')
#     cleaned_df['img_filepath'] = cleaned_df['img_filepath'].str.replace('\\', '/')
#     cleaned_df = cleaned_df.drop(columns = ['avatar_username', 'rank_username']).reset_index(drop = True).set_index('name_index')

#     return cleaned_df