### Loading data

In [380]:
import pandas as pd
import numpy as np
import seaborn as sns

import requests
from bs4 import BeautifulSoup

%matplotlib inline
import matplotlib

import re

**Import data** - Only Data from the last two and a half years are processed, as according to this article: https://igamingbusiness.com/esports/how-much-league-of-legends-data-is-enough-data/ this is the most sensible with the bi-weekly patches the game receives (changes to the game)

In [381]:
df_2020 = pd.read_csv('raw_data/2020_LoL_esports_Data.csv', parse_dates=['date'])
df_2021 = pd.read_csv('raw_data/2021_LoL_esports_Data.csv', parse_dates=['date'])
df_2022 = pd.read_csv('raw_data/2022_LoL_esports_Data.csv', parse_dates=['date'])
df_2023 = pd.read_csv('raw_data/2023_LoL_esports_Data.csv', parse_dates=['date'])

  df_2022 = pd.read_csv('raw_data/2022_LoL_esports_Data.csv', parse_dates=['date'])
  df_2023 = pd.read_csv('raw_data/2023_LoL_esports_Data.csv', parse_dates=['date'])


In [382]:
dmg_types = pd.read_csv('raw_data/champions_dmg_type.csv', encoding='cp1252')

**Loading champion winrates**

In [383]:
final_df = pd.read_csv("raw_data/all_patches_final.csv")

final_df["Win%"] = final_df["Win %"].str.rstrip('%')

final_df["Win%"] = pd.to_numeric(final_df["Win%"])

grouped_df = final_df.groupby(["patch", "Name"])["Win%"].max().reset_index()

grouped_df.to_pickle("Champions_dataframe/win_and_champions_patch.pickle")

champions_wr_overview = grouped_df

output = []

for i in champions_wr_overview["Name"]:
    text = i
    output.append(text[:len(text)//2])

champions_wr_overview["name_clean"] = output
champions_wr_overview = champions_wr_overview.drop("Name", axis=1)

In [384]:
champions_wr_overview = grouped_df

output = []

for i in champions_wr_overview["Name"]:
    text = i
    output.append(text[:len(text)//2])

champions_wr_overview["name_clean"] = output
champions_wr_overview = champions_wr_overview.drop("Name", axis=1)
champions_wr_overview = champions_wr_overview.replace(['Nunu'], 'Eskimo')

**Filter data for most important leagues and team data only**

In [1352]:
# Only focused on best of 1 match formats (so no LCK, LPL)

european_leagues = ["LEC", "LFL", "PRM", "LVP", "TCL", "UL", "EBL"]
tier_1_leagues = ["LEC", "LCS", "PCS", "CBLOL", "LLA", "LJL", "LCL"]
tier_2_leagues = ["LCO", "LFL", "TCL", "UL", "CBLOLA", "LPLOL"]
all_leagues = ["LEC", "LCS", "PCS", "CBLOL", "LLA", "TCL", "LJL", "LCL", "UL", "EBL"]
model_optimized = ["LEC", "LCS", "PCS", "CBLOL", "TCL", "LLA", "LJL", "LCL"]
league_try = ["LEC", "LCS", "PCS", "CBLOL", "TCL", "LLA", "LCL"]

In [1353]:
leagues_to_filter = league_try

# Filter the dataframe to include only the specified leagues
league_filtered_df_2020 = df_2020.loc[df_2020['league'].isin(leagues_to_filter)]
league_filtered_df_2021 = df_2021.loc[df_2021['league'].isin(leagues_to_filter)]
league_filtered_df_2022 = df_2022.loc[df_2022['league'].isin(leagues_to_filter)]
league_filtered_df_2023 = df_2023.loc[df_2023['league'].isin(leagues_to_filter)]

# Filter the dataframe to only include team-based data (not individual)
filtered_df_2020 = league_filtered_df_2020.loc[league_filtered_df_2020['position'].isin(["team"])]
filtered_df_2021 = league_filtered_df_2021.loc[league_filtered_df_2021['position'].isin(["team"])]
filtered_df_2022 = league_filtered_df_2022.loc[league_filtered_df_2022['position'].isin(["team"])]
filtered_df_2023 = league_filtered_df_2023.loc[league_filtered_df_2023['position'].isin(["team"])]

# Concat the data for full live data
full_data = pd.concat([df_2021, df_2022, df_2023])

# Filter the dataframe to only include champion data

patch_information = full_data.loc[full_data['participantid'].isin([1])].set_index('gameid')['patch']
champion_1 = full_data.loc[full_data['participantid'].isin([1])].set_index('gameid')['champion']
champion_2 = full_data.loc[full_data['participantid'].isin([2])].set_index('gameid')['champion']
champion_3 = full_data.loc[full_data['participantid'].isin([3])].set_index('gameid')['champion']
champion_4 = full_data.loc[full_data['participantid'].isin([4])].set_index('gameid')['champion']
champion_5 = full_data.loc[full_data['participantid'].isin([5])].set_index('gameid')['champion']
champion_6 = full_data.loc[full_data['participantid'].isin([6])].set_index('gameid')['champion']
champion_7 = full_data.loc[full_data['participantid'].isin([7])].set_index('gameid')['champion']
champion_8 = full_data.loc[full_data['participantid'].isin([8])].set_index('gameid')['champion']
champion_9 = full_data.loc[full_data['participantid'].isin([9])].set_index('gameid')['champion']
champion_10 = full_data.loc[full_data['participantid'].isin([10])].set_index('gameid')['champion']

champions_list = [patch_information,champion_1, 
               champion_2,
               champion_3,
               champion_4,
               champion_5,
               champion_6,
               champion_7,
               champion_8,
               champion_9,
               champion_10]

# Filter the dataframe to only include played id data

patch_information = full_data.loc[full_data['participantid'].isin([1])].set_index('gameid')['patch']
player_1 = full_data.loc[full_data['participantid'].isin([1])].set_index('gameid')['playerid']
player_2 = full_data.loc[full_data['participantid'].isin([2])].set_index('gameid')['playerid']
player_3 = full_data.loc[full_data['participantid'].isin([3])].set_index('gameid')['playerid']
player_4 = full_data.loc[full_data['participantid'].isin([4])].set_index('gameid')['playerid']
player_5 = full_data.loc[full_data['participantid'].isin([5])].set_index('gameid')['playerid']
player_6 = full_data.loc[full_data['participantid'].isin([6])].set_index('gameid')['playerid']
player_7 = full_data.loc[full_data['participantid'].isin([7])].set_index('gameid')['playerid']
player_8 = full_data.loc[full_data['participantid'].isin([8])].set_index('gameid')['playerid']
player_9 = full_data.loc[full_data['participantid'].isin([9])].set_index('gameid')['playerid']
player_10 = full_data.loc[full_data['participantid'].isin([10])].set_index('gameid')['playerid']

playerids_list = [patch_information,player_1, 
               player_2,
               player_3,
               player_4,
               player_5,
               player_6,
               player_7,
               player_8,
               player_9,
               player_10]


In [1354]:
for i, series in enumerate(champions_list):
    if i == 0:   
        champions = pd.DataFrame(series)
        champions.columns = ["patch"]
    else:
        champions[f"player_{i}"] = series
        
champions = champions.dropna(axis=0)
champions = champions.replace(['Nunu & Willump'], 'Eskimo')

In [1355]:
for i, series in enumerate(playerids_list):
    if i == 0:   
        players = pd.DataFrame(series)
        players.columns = ["patch"]
    else:
        players[f"player_{i}"] = series

### Derivation of the "player experience difference" parameter

**Historic experience**

In [1356]:
# 2020

tmp = league_filtered_df_2020.loc[league_filtered_df_2020['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'gameid', 'playerid', 'champion']].reset_index()
tmp = tmp.dropna()
tmp = tmp.set_index(['playerid', 'date', 'gameid', 'champion'])

historic_experience_2020 = tmp.groupby(['playerid']).count()

In [1357]:
# 2021

tmp = league_filtered_df_2021.loc[league_filtered_df_2021['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'gameid', 'playerid', 'champion']].reset_index()
tmp = tmp.dropna()
tmp = tmp.set_index(['playerid', 'date', 'gameid', 'champion'])

historic_experience_2021 = tmp.groupby(['playerid']).count()

In [1358]:
# 2022

tmp = league_filtered_df_2022.loc[league_filtered_df_2022['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'gameid', 'playerid', 'champion']].reset_index()
tmp = tmp.dropna()
tmp = tmp.set_index(['playerid', 'date', 'gameid', 'champion'])

historic_experience_2022 = tmp.groupby(['playerid']).count()

**Dynamic experience**

In [1359]:
# 2021

tmp = league_filtered_df_2021.loc[league_filtered_df_2021['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'playerid', 'gameid', 'result']].reset_index().set_index(['playerid', 'date']).sort_index()
tmp = tmp.dropna()
player_games_played = pd.DataFrame()

# Need to adapt, so that the information is as before the game
player_games_played['total_games'] = tmp.groupby('playerid')['result'].cumcount()
tmp = pd.merge(tmp, player_games_played,  how='left', left_on=['date','playerid'], right_on = ['date','playerid'])

experience_2021 = tmp

In [1360]:
# 2022

tmp = league_filtered_df_2022.loc[league_filtered_df_2022['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'playerid', 'gameid', 'result']].reset_index().set_index(['playerid', 'date']).sort_index()
tmp = tmp.dropna()
player_games_played = pd.DataFrame()

# Need to adapt, so that the information is as before the game
player_games_played['total_games'] = tmp.groupby('playerid')['result'].cumcount()
tmp = pd.merge(tmp, player_games_played,  how='left', left_on=['date','playerid'], right_on = ['date','playerid'])

experience_2022 = tmp

In [1361]:
# 2023

tmp = league_filtered_df_2023.loc[league_filtered_df_2023['participantid'].isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])]
tmp = tmp[['date', 'playerid', 'gameid', 'result']].reset_index().set_index(['playerid', 'date']).sort_index()
tmp = tmp.dropna()
player_games_played = pd.DataFrame()

# Need to adapt, so that the information is as before the game
player_games_played['total_games'] = tmp.groupby('playerid')['result'].cumcount()
tmp = pd.merge(tmp, player_games_played,  how='left', left_on=['date','playerid'], right_on = ['date','playerid'])

experience_2023 = tmp

In [1362]:
mixed_exp_2021 = pd.merge(experience_2021.reset_index(), historic_experience_2020, how='left', left_on='playerid', right_on='playerid')
mixed_exp_2021 = mixed_exp_2021.fillna(value=0)
mixed_exp_2021['experience'] = mixed_exp_2021['total_games'] + mixed_exp_2021['index_y']

mixed_exp_2022 = pd.merge(experience_2022.reset_index(), historic_experience_2021, how='left', left_on='playerid', right_on='playerid')
mixed_exp_2022 = mixed_exp_2022.fillna(value=0)
mixed_exp_2022['experience'] = mixed_exp_2022['total_games'] + mixed_exp_2022['index_y']

mixed_exp_2023 = pd.merge(experience_2023.reset_index(), historic_experience_2022, how='left', left_on='playerid', right_on='playerid')
mixed_exp_2023 = mixed_exp_2023.fillna(value=0)
mixed_exp_2023['experience'] = mixed_exp_2023['total_games'] + mixed_exp_2023['index_y']

experience = pd.concat([mixed_exp_2021, mixed_exp_2022, mixed_exp_2023], ignore_index=True).set_index(['gameid', 'playerid'])['experience']

In [1363]:
exp_1 = pd.merge(players.reset_index(), experience, how='left', left_on=['gameid', 'player_1'], right_on=['gameid', 'playerid'])['experience']
exp_2 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_2'], right_on=['gameid', 'playerid'])['experience']
exp_3 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_3'], right_on=['gameid', 'playerid'])['experience']
exp_4 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_4'], right_on=['gameid', 'playerid'])['experience']
exp_5 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_5'], right_on=['gameid', 'playerid'])['experience']
exp_6 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_6'], right_on=['gameid', 'playerid'])['experience']
exp_7 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_7'], right_on=['gameid', 'playerid'])['experience']
exp_8 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_8'], right_on=['gameid', 'playerid'])['experience']
exp_9 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_9'], right_on=['gameid', 'playerid'])['experience']
exp_10 = pd.merge(players.reset_index(), experience, how='left',left_on=['gameid', 'player_10'], right_on=['gameid', 'playerid'])['experience']

exp_list = [
    champions.reset_index()['gameid'],
    exp_1,
    exp_2,
    exp_3,
    exp_4,
    exp_5,
    exp_6,
    exp_7,
    exp_8,
    exp_9,
    exp_10,
]

In [1364]:
for i, series in enumerate(exp_list):
    if i == 0:   
        exp = pd.DataFrame(series)
        exp.columns = ["gameid"]
    else:
        exp[f"player_{i}"] = series
        
exp = exp.fillna(value=0)

exp['avg_blue_exp'] = (exp['player_1'] + exp['player_2'] + exp['player_3'] + exp['player_4'] + exp['player_5'])/5

exp['avg_red_exp'] = (exp['player_6'] + exp['player_7'] + exp['player_8'] + exp['player_9'] + exp['player_10'])/5

exp['exp_diff'] = exp['avg_blue_exp'] - exp['avg_red_exp']

exp = exp[['gameid', 'exp_diff']]

### Derivation of the "In Form" Parameter

Defined as a binary variable which is either 1 (won last three games) or 0

In [1365]:
def check_form(a, b, c):
    if a > b and b > c:
        return 1
    return 0

In [1366]:
# 2021
tmp = filtered_df_2021[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])



counter_1 = -2
counter_2 = -1
output = []

tmp['tmp'] = tmp['result_y']
tmp['tmp'] = tmp['tmp'].fillna(value=0) 

for i in range(len(tmp['tmp'])):
    if i < 1:
        output.append(0)
    else:
        output.append(check_form(tmp['tmp'][i], tmp['tmp'][counter_2], tmp['tmp'][counter_1]))
        counter_1 += 1
        counter_2 += 1
        
tmp['streak'] = output

streak_2021 = tmp.reset_index()

In [1367]:
# 2022
tmp = filtered_df_2022[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])



counter_1 = -2
counter_2 = -1
output = []

tmp['tmp'] = tmp['result_y']
tmp['tmp'] = tmp['tmp'].fillna(value=0) 

for i in range(len(tmp['tmp'])):
    if i < 1:
        output.append(0)
    else:
        output.append(check_form(tmp['tmp'][i], tmp['tmp'][counter_2], tmp['tmp'][counter_1]))
        counter_1 += 1
        counter_2 += 1
        
tmp['streak'] = output

streak_2022 = tmp.reset_index()

In [1368]:
# 2023
tmp = filtered_df_2023[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])



counter_1 = -1
counter_2 = 0
output = []

tmp['tmp'] = tmp['result_y']
tmp['tmp'] = tmp['tmp'].fillna(value=0) 

for i in range(len(tmp['tmp'])):
    if i < 1:
        output.append(0)
    else:
        output.append(check_form(tmp['tmp'][i], tmp['tmp'][counter_2], tmp['tmp'][counter_1]))
        counter_1 += 1
        counter_2 += 1
        
tmp['streak'] = output

streak_2023 = tmp.reset_index()

In [1369]:
streak = pd.concat([streak_2021, streak_2022, streak_2023], ignore_index=True).set_index(['gameid', 'teamid'])['streak']

### Derivation of the "Win Rate Difference" Parameter

**Calculate historical win rate, defined as overall performance in last season**

In [1370]:
# Group the DataFrame by team_id and count the number of games played
games_played_2022 = filtered_df_2022.groupby('teamid')['result'].count()
games_played_2021 = filtered_df_2021.groupby('teamid')['result'].count()
games_played_2020 = filtered_df_2020.groupby('teamid')['result'].count()

# Group the DataFrame by team_id and count the number of wins
wins_2022 = filtered_df_2022.groupby('teamid')['result'].sum()
wins_2021 = filtered_df_2021.groupby('teamid')['result'].sum()
wins_2020 = filtered_df_2020.groupby('teamid')['result'].sum()

# Calculate the win rate for each team
win_rates_in_2022 = (wins_2022 / games_played_2022) * 100
win_rates_in_2021 = (wins_2021 / games_played_2021) * 100
win_rates_in_2020 = (wins_2020 / games_played_2020) * 100

In [1371]:
tmp_0 = pd.DataFrame(filtered_df_2021.groupby('teamid').count().iloc[:, 0])
tmp_1 = pd.DataFrame(filtered_df_2022.groupby('teamid').count().iloc[:, 0])
tmp_2 = pd.DataFrame(filtered_df_2023.groupby('teamid').count().iloc[:, 0])

historical_wr_2021 = pd.merge(tmp_0, win_rates_in_2020, how='left', left_index=True, right_index=True)
historical_wr_2022 = pd.merge(tmp_1, win_rates_in_2021, how='left', left_index=True, right_index=True)
historical_wr_2023 = pd.merge(tmp_2, win_rates_in_2022, how='left', left_index=True, right_index=True)

historical_wr_2021.fillna(value=historical_wr_2021['result'].mean(), inplace=True)
historical_wr_2022.fillna(value=historical_wr_2022['result'].mean(), inplace=True)
historical_wr_2023.fillna(value=historical_wr_2023['result'].mean(), inplace=True)

**Calculate dynamic win rate throughout the season, defined as games won divided by games played so far this season**

In [1372]:
# Number of games until only current season counts as win rate:
n_games = 10

In [1373]:
# 2021
tmp = filtered_df_2021[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['result'].cumcount()
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_wr'] = tmp['result_y'] / tmp['total_games'] * 100

# If no data available (new teams) replace with average of 50%
tmp['dynamic_wr'] = tmp['dynamic_wr'].replace([np.inf, -np.inf], np.nan)
tmp['dynamic_wr'] = tmp['dynamic_wr'].fillna(value=50)

dynamic_wr_2021 = tmp

# 2022
tmp = filtered_df_2022[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['result'].cumcount()
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_wr'] = tmp['result_y'] / tmp['total_games'] * 100

# If no data available (new teams) replace with average of 50%
tmp['dynamic_wr'] = tmp['dynamic_wr'].replace([np.inf, -np.inf], np.nan)
tmp['dynamic_wr'] = tmp['dynamic_wr'].fillna(value=50)

dynamic_wr_2022 = tmp

# 2023
tmp = filtered_df_2023[['date', 'teamid', 'gameid', 'result']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['result'].cumcount()
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
games_won_dynamic = pd.DataFrame(tmp.groupby('teamid')['result'].shift()).groupby('teamid')['result'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, games_won_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_wr'] = tmp['result_y'] / tmp['total_games'] * 100

# If no data available (new teams) replace with average of 50%
tmp['dynamic_wr'] = tmp['dynamic_wr'].replace([np.inf, -np.inf], np.nan)
tmp['dynamic_wr'] = tmp['dynamic_wr'].fillna(value=50)

dynamic_wr_2023 = tmp

**Mixing historical and dynamic win rate to arrive at an expected win rate prior to the game**

In [1374]:
wr_2021 = pd.merge(dynamic_wr_2021, historical_wr_2021, how='left', left_on='teamid', right_on='teamid').reset_index()
wr_2021['mixed_wr'] = wr_2021['dynamic_wr'] * wr_2021['dynamic_weighting'] + wr_2021['result'] * (1 - wr_2021['dynamic_weighting'])

wr_2022 = pd.merge(dynamic_wr_2022, historical_wr_2022, how='left', left_on='teamid', right_on='teamid').reset_index()
wr_2022['mixed_wr'] = wr_2022['dynamic_wr'] * wr_2022['dynamic_weighting'] + wr_2022['result'] * (1 - wr_2022['dynamic_weighting'])

wr_2023 = pd.merge(dynamic_wr_2023, historical_wr_2023, how='left', left_on='teamid', right_on='teamid').reset_index()
wr_2023['mixed_wr'] = wr_2023['dynamic_wr'] * wr_2022['dynamic_weighting'] + wr_2023['result'] * (1 - wr_2022['dynamic_weighting'])

wr = pd.concat([wr_2021, wr_2022, wr_2023], ignore_index=True).set_index(['gameid_x', 'teamid'])['mixed_wr']

### Derivation of the "Vision score per minute difference percentage" parameter 

In [1375]:
# 2021
tmp = pd.DataFrame(filtered_df_2021.groupby('teamid').count().iloc[:, 0])
vspm_in_2020 = filtered_df_2020.groupby('teamid')['vspm'].mean()
historical_vspm_2021 = pd.merge(tmp, vspm_in_2020, how='left', left_index=True, right_index=True)
historical_vspm_2021.fillna(value=vspm_in_2020.mean(), inplace=True)

# 2022
tmp = pd.DataFrame(filtered_df_2022.groupby('teamid').count().iloc[:, 0])
vspm_in_2021 = filtered_df_2021.groupby('teamid')['vspm'].mean()
historical_vspm_2022 = pd.merge(tmp, vspm_in_2021, how='left', left_index=True, right_index=True)
historical_vspm_2022.fillna(value=vspm_in_2021.mean(), inplace=True)

# 2023
tmp = pd.DataFrame(filtered_df_2023.groupby('teamid').count().iloc[:, 0])
vspm_in_2022 = filtered_df_2022.groupby('teamid')['vspm'].mean()
historical_vspm_2023 = pd.merge(tmp, vspm_in_2022, how='left', left_index=True, right_index=True)
historical_vspm_2023.fillna(value=vspm_in_2022.mean(), inplace=True)

**Dynamic vspm troughout the season**

In [1376]:
# 2021
tmp = filtered_df_2021[['date', 'teamid', 'gameid', 'vspm']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['vspm'].cumcount() 
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
vspm_dynamic = pd.DataFrame(tmp.groupby('teamid')['vspm'].shift()).groupby('teamid')['vspm'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, vspm_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_vspm'] = tmp['vspm_y'] / tmp['total_games']

tmp.fillna(value=tmp['dynamic_vspm'].mean(), inplace=True)
dynamic_vspm_2021 = tmp


# 2022
tmp = filtered_df_2022[['date', 'teamid', 'gameid', 'vspm']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['vspm'].cumcount() 
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
vspm_dynamic = pd.DataFrame(tmp.groupby('teamid')['vspm'].shift()).groupby('teamid')['vspm'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, vspm_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_vspm'] = tmp['vspm_y'] / tmp['total_games']

tmp.fillna(value=tmp['dynamic_vspm'].mean(), inplace=True)
dynamic_vspm_2022 = tmp


# 2023
tmp = filtered_df_2023[['date', 'teamid', 'gameid', 'vspm']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['vspm'].cumcount()
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
vspm_dynamic = pd.DataFrame(tmp.groupby('teamid')['vspm'].shift()).groupby('teamid')['vspm'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, vspm_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_vspm'] = tmp['vspm_y'] / tmp['total_games']

tmp.fillna(value=tmp['dynamic_vspm'].mean(), inplace=True)
dynamic_vspm_2023 = tmp

**Mixing historical and dynamic vspm and concating 2022 and 2023 data**

In [1377]:
vspm_2021 = pd.merge(dynamic_vspm_2021, historical_vspm_2021, how='left', left_on='teamid', right_on='teamid').reset_index()
vspm_2021['mixed_vspm'] = vspm_2021['dynamic_vspm'] * vspm_2021['dynamic_weighting'] + vspm_2021['vspm'] * (1 - vspm_2021['dynamic_weighting'])

vspm_2022 = pd.merge(dynamic_vspm_2022, historical_vspm_2022, how='left', left_on='teamid', right_on='teamid').reset_index()
vspm_2022['mixed_vspm'] = vspm_2022['dynamic_vspm'] * vspm_2022['dynamic_weighting'] + vspm_2022['vspm'] * (1 - vspm_2022['dynamic_weighting'])

vspm_2023 = pd.merge(dynamic_vspm_2023, historical_vspm_2023, how='left', left_on='teamid', right_on='teamid').reset_index()
vspm_2023['mixed_vspm'] = vspm_2023['dynamic_vspm'] * vspm_2022['dynamic_weighting'] + vspm_2023['vspm'] * (1 - vspm_2022['dynamic_weighting'])

vspm = pd.concat([vspm_2021, vspm_2022, vspm_2023], ignore_index=True).set_index(['gameid_x', 'teamid'])['mixed_vspm']

vspm.replace([np.inf, -np.inf], np.nan, inplace=True)
vspm = vspm.fillna(method='bfill')

### Derivation of the "Gold spend difference percentage" parameter

**Historical gspd**

In [1378]:
# 2021
tmp = pd.DataFrame(filtered_df_2021.groupby('teamid').count().iloc[:, 0])
gspd_in_2020 = filtered_df_2020.groupby('teamid')['gspd'].mean() * 100
historical_gspd_2021 = pd.merge(tmp, gspd_in_2020, how='left', left_index=True, right_index=True)
historical_gspd_2021.fillna(value=0, inplace=True)

# 2022
tmp = pd.DataFrame(filtered_df_2022.groupby('teamid').count().iloc[:, 0])
gspd_in_2021 = filtered_df_2021.groupby('teamid')['gspd'].mean() * 100
historical_gspd_2022 = pd.merge(tmp, gspd_in_2021, how='left', left_index=True, right_index=True)
historical_gspd_2022.fillna(value=0, inplace=True)

# 2023
tmp = pd.DataFrame(filtered_df_2023.groupby('teamid').count().iloc[:, 0])
gspd_in_2022 = filtered_df_2022.groupby('teamid')['gspd'].mean() * 100
historical_gspd_2023 = pd.merge(tmp, gspd_in_2022, how='left', left_index=True, right_index=True)
historical_gspd_2023.fillna(value=0, inplace=True)

**Dynamic gspd troughout the season**

In [1379]:
# 2021
tmp = filtered_df_2021[['date', 'teamid', 'gameid', 'gspd']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['gspd'].cumcount() 
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
gspd_dynamic = pd.DataFrame(tmp.groupby('teamid')['gspd'].shift()).groupby('teamid')['gspd'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, gspd_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_gspd'] = tmp['gspd_y'] / tmp['total_games'] * 100

tmp.fillna(value=0, inplace=True)
dynamic_gspd_2021 = tmp


# 2022
tmp = filtered_df_2022[['date', 'teamid', 'gameid', 'gspd']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['gspd'].cumcount() 
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
gspd_dynamic = pd.DataFrame(tmp.groupby('teamid')['gspd'].shift()).groupby('teamid')['gspd'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, gspd_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_gspd'] = tmp['gspd_y'] / tmp['total_games'] * 100

tmp.fillna(value=0, inplace=True)
dynamic_gspd_2022 = tmp


# 2023
tmp = filtered_df_2023[['date', 'teamid', 'gameid', 'gspd']].reset_index().set_index(['teamid', 'date']).sort_index()
games_played_dynamic = pd.DataFrame()

# Need to adapt, so that the information is as before the game
games_played_dynamic['total_games'] = tmp.groupby('teamid')['gspd'].cumcount()
games_played_dynamic['dynamic_weighting'] = np.clip(0, games_played_dynamic['total_games']/n_games, 1) # dynamic weighting factor
gspd_dynamic = pd.DataFrame(tmp.groupby('teamid')['gspd'].shift()).groupby('teamid')['gspd'].cumsum()

tmp = pd.merge(tmp, games_played_dynamic,  how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp = pd.merge(tmp, gspd_dynamic, how='left', left_on=['date','teamid'], right_on = ['date','teamid'])
tmp['dynamic_gspd'] = tmp['gspd_y'] / tmp['total_games'] * 100

tmp.fillna(value=0, inplace=True)
dynamic_gspd_2023 = tmp

**Mixing historical and dynamic gspd and concating 2022 and 2023 data**

In [1380]:
gspd_2021 = pd.merge(dynamic_gspd_2021, historical_gspd_2021, how='left', left_on='teamid', right_on='teamid').reset_index()
gspd_2021['mixed_gspd'] = gspd_2021['dynamic_gspd'] * gspd_2021['dynamic_weighting'] + gspd_2021['gspd'] * (1 - gspd_2021['dynamic_weighting'])

gspd_2022 = pd.merge(dynamic_gspd_2022, historical_gspd_2022, how='left', left_on='teamid', right_on='teamid').reset_index()
gspd_2022['mixed_gspd'] = gspd_2022['dynamic_gspd'] * gspd_2022['dynamic_weighting'] + gspd_2022['gspd'] * (1 - gspd_2022['dynamic_weighting'])

gspd_2023 = pd.merge(dynamic_gspd_2023, historical_gspd_2023, how='left', left_on='teamid', right_on='teamid').reset_index()
gspd_2023['mixed_gspd'] = gspd_2023['dynamic_gspd'] * gspd_2022['dynamic_weighting'] + gspd_2023['gspd'] * (1 - gspd_2022['dynamic_weighting'])

gspd = pd.concat([gspd_2021, gspd_2022, gspd_2023], ignore_index=True).set_index(['gameid_x', 'teamid'])['mixed_gspd']

gspd.replace([np.inf, -np.inf], np.nan, inplace=True)
gspd = gspd.fillna(method='bfill')

### Derivation of the "Champion winrate difference" parameter

In [1381]:
wr_chmp_player_1 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_1'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_2 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_2'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_3 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_3'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_4 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_4'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_5 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_5'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_6 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_6'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_7 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_7'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_8 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_8'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_9 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_9'], right_on=['patch', 'name_clean'])["Win%"]
wr_chmp_player_10 = pd.merge(champions.reset_index(), champions_wr_overview, how='left', left_on=['patch', 'player_10'], right_on=['patch', 'name_clean'])["Win%"]

wr_chmp_player_1 = wr_chmp_player_1.replace([np.nan, 0],50)
wr_chmp_player_2 = wr_chmp_player_2.replace([np.nan, 0],50)
wr_chmp_player_3 = wr_chmp_player_3.replace([np.nan, 0],50)
wr_chmp_player_4 = wr_chmp_player_4.replace([np.nan, 0],50)
wr_chmp_player_5 = wr_chmp_player_5.replace([np.nan, 0],50)
wr_chmp_player_6 = wr_chmp_player_6.replace([np.nan, 0],50)
wr_chmp_player_7 = wr_chmp_player_7.replace([np.nan, 0],50)
wr_chmp_player_8 = wr_chmp_player_8.replace([np.nan, 0],50)
wr_chmp_player_9 = wr_chmp_player_9.replace([np.nan, 0],50)
wr_chmp_player_10 = wr_chmp_player_10.replace([np.nan, 0],50)

In [1382]:
wr_chmp_list = [
    champions.reset_index()['gameid'],
    wr_chmp_player_1,
    wr_chmp_player_2,
    wr_chmp_player_3,
    wr_chmp_player_4,
    wr_chmp_player_5,
    wr_chmp_player_6,
    wr_chmp_player_7,
    wr_chmp_player_8,
    wr_chmp_player_9,
    wr_chmp_player_10,
]

In [1383]:
for i, series in enumerate(wr_chmp_list):
    if i == 0:   
        chmp_wr = pd.DataFrame(series)
        chmp_wr.columns = ["gameid"]
    else:
        chmp_wr[f"player_{i}"] = series

In [1384]:
chmp_wr['avg_blue_champion_wr'] = (chmp_wr['player_1'] + chmp_wr['player_2'] + chmp_wr['player_3'] + chmp_wr['player_4'] + chmp_wr['player_5'])/5

chmp_wr['avg_red_champion_wr'] = (chmp_wr['player_6'] + chmp_wr['player_7'] + chmp_wr['player_8'] + chmp_wr['player_9'] + chmp_wr['player_10'])/5

chmp_wr['chmp_wr_diff'] = chmp_wr['avg_blue_champion_wr'] - chmp_wr['avg_red_champion_wr']

chmp_wr = chmp_wr[['gameid', 'chmp_wr_diff']]

### Derivation of dmg type parameter

In [1385]:
dmg_types['clean_names'] = sorted(champions_wr_overview.reset_index()['name_clean'].unique())

In [1386]:
dmg_1 = pd.merge(champions, dmg_types, how='left', left_on='player_1', right_on='clean_names')['Dmg type']
dmg_2 = pd.merge(champions, dmg_types, how='left', left_on='player_2', right_on='clean_names')['Dmg type']
dmg_3 = pd.merge(champions, dmg_types, how='left', left_on='player_3', right_on='clean_names')['Dmg type']
dmg_4 = pd.merge(champions, dmg_types, how='left', left_on='player_4', right_on='clean_names')['Dmg type']
dmg_5 = pd.merge(champions, dmg_types, how='left', left_on='player_5', right_on='clean_names')['Dmg type']
dmg_6 = pd.merge(champions, dmg_types, how='left', left_on='player_6', right_on='clean_names')['Dmg type']
dmg_7 = pd.merge(champions, dmg_types, how='left', left_on='player_7', right_on='clean_names')['Dmg type']
dmg_8 = pd.merge(champions, dmg_types, how='left', left_on='player_8', right_on='clean_names')['Dmg type']
dmg_9 = pd.merge(champions, dmg_types, how='left', left_on='player_9', right_on='clean_names')['Dmg type']
dmg_10 = pd.merge(champions, dmg_types, how='left', left_on='player_10', right_on='clean_names')['Dmg type']

dmg_list = [
    champions.reset_index()['gameid'],
    dmg_1,
    dmg_2,
    dmg_3,
    dmg_4,
    dmg_5,
    dmg_6,
    dmg_7,
    dmg_8,
    dmg_9,
    dmg_10,
]

In [1387]:
for i, series in enumerate(dmg_list):
    if i == 0:   
        dmg = pd.DataFrame(series)
        dmg.columns = ["gameid"]
    else:
        dmg[f"player_{i}"] = series

In [1388]:
dmg = dmg.replace(["AD"], 1)
dmg = dmg.replace(["AP"], 0)

dmg['blue_all_dmg'] = 0
dmg['red_all_dmg'] = 0

dmg['blue_all_dmg'] = dmg.apply(lambda row: 1 if row['player_1'] + row['player_2'] + 
                                row['player_3'] + row['player_4'] + row['player_5'] > 3 or 
                                row['player_1'] + row['player_2'] + 
                                row['player_3'] + row['player_4'] + row['player_5'] < 2 else 0, axis=1)

dmg['red_all_dmg'] = dmg.apply(lambda row: 1 if row['player_6'] + row['player_7'] + 
                                row['player_8'] + row['player_9'] + row['player_10'] > 3 or 
                                row['player_6'] + row['player_7'] + 
                                row['player_8'] + row['player_9'] + row['player_10'] < 2 else 0, axis=1)

dmg['comp_diff'] = dmg['blue_all_dmg'] - dmg['red_all_dmg']

dmg = dmg.set_index('gameid')['comp_diff']

### Derivation of the "player x champion wr"

In [1401]:
players

Unnamed: 0_level_0,patch,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9,player_10
gameid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ESPORTSTMNT03/1632489,10.25,oe:player:b9972f46c1e52797d66236b118d2970,oe:player:92544d7a994bd6841cccd9f6e42e3de,oe:player:5f8ff49cd56eae61d635966b183a451,oe:player:78081d0b3c01f1f295d52d53744261a,oe:player:dee54127bb56d2e2540ba71cc416f1c,oe:player:6fbf57b984dbf130701e09ea4110bb8,oe:player:dda4a873265dfd06df50a0d8a294908,oe:player:0582fabc486c42d0db3bf52f56c6fc3,oe:player:18bdb3898e7fecb3bb0be7ff075d509,oe:player:b0ec3df39d61f953e7af0afd8eb8c4a
ESPORTSTMNT03/1632500,10.25,oe:player:6fbf57b984dbf130701e09ea4110bb8,oe:player:dda4a873265dfd06df50a0d8a294908,oe:player:0582fabc486c42d0db3bf52f56c6fc3,oe:player:18bdb3898e7fecb3bb0be7ff075d509,oe:player:b0ec3df39d61f953e7af0afd8eb8c4a,oe:player:b9972f46c1e52797d66236b118d2970,oe:player:92544d7a994bd6841cccd9f6e42e3de,oe:player:5f8ff49cd56eae61d635966b183a451,oe:player:78081d0b3c01f1f295d52d53744261a,oe:player:dee54127bb56d2e2540ba71cc416f1c
ESPORTSTMNT03/1632502,10.25,oe:player:6fbf57b984dbf130701e09ea4110bb8,oe:player:dda4a873265dfd06df50a0d8a294908,oe:player:0582fabc486c42d0db3bf52f56c6fc3,oe:player:18bdb3898e7fecb3bb0be7ff075d509,oe:player:b0ec3df39d61f953e7af0afd8eb8c4a,oe:player:b9972f46c1e52797d66236b118d2970,oe:player:92544d7a994bd6841cccd9f6e42e3de,oe:player:5f8ff49cd56eae61d635966b183a451,oe:player:78081d0b3c01f1f295d52d53744261a,oe:player:dee54127bb56d2e2540ba71cc416f1c
6909-9183,11.01,oe:player:0c74b7f78409a4022a2c4c5a5ca3ee1,oe:player:f3322656ae1facbf4763677fe2506a3,oe:player:ed29043422e3c515e1cb292464b990b,oe:player:c54e1090ef1faa736b12493051c4855,oe:player:e78da55d6db43d982f841204a8e51ac,oe:player:fb66ef5885b4be9323905b821dc3a42,oe:player:a408b9f6a7b5f82bfac03a88e026986,oe:player:542b1268b090a5fc82442efbd8d8165,oe:player:6d664d1402cd16be08e335feacf73b0,oe:player:90651ebea9a35ec4e018c8157492e17
6909-9184,11.01,oe:player:0c74b7f78409a4022a2c4c5a5ca3ee1,oe:player:f3322656ae1facbf4763677fe2506a3,oe:player:ed29043422e3c515e1cb292464b990b,oe:player:c54e1090ef1faa736b12493051c4855,oe:player:e78da55d6db43d982f841204a8e51ac,oe:player:fb66ef5885b4be9323905b821dc3a42,oe:player:a408b9f6a7b5f82bfac03a88e026986,oe:player:542b1268b090a5fc82442efbd8d8165,oe:player:6d664d1402cd16be08e335feacf73b0,oe:player:90651ebea9a35ec4e018c8157492e17
...,...,...,...,...,...,...,...,...,...,...,...
ESPORTSTMNT01_3334332,13.05,oe:player:d9993db0d276065093b77b8e86996fc,oe:player:e5841c7215d35ed0c9522450e7fb57a,oe:player:7773e8883f4bf85d29f4e7a0b2d9709,oe:player:77c10036edafc6b267a710e0525bf7a,oe:player:f987efb3da0d056699079e5f6755ef2,oe:player:d18a2aec1fa9649ae9a48bded2f4ccf,oe:player:8a0883855988aadb0c22117c2c96da7,oe:player:2e6e9d5865bd2e4e5a4c33372366cb6,oe:player:32021f59c40539f27ff6300bb840ca2,oe:player:22d4497301dd0fefcac2d838ad2d1b4
ESPORTSTMNT06_2778084,13.04,oe:player:6a9abcfebed9c636ca90560b7f49855,oe:player:404079b4e478ea2a8b57904c640311b,oe:player:8f862245fbdb4d065e46c83630b9495,oe:player:1c51707d8fd9873c9fd184e2cd01b33,oe:player:bb6ac5763c6576e104a7a2f59eb4ddf,oe:player:2b819506567c258f6fffdd4ec78ca0c,oe:player:ab7f28ca18446cb18274a884b1525f7,oe:player:da87886941820cfc2eee112d0143e90,oe:player:1dad889ff236635dc1e03d02312e79c,oe:player:86dbd2fb95030a3ed1efee6808eb125
ESPORTSTMNT01_3334356,13.05,oe:player:d18a2aec1fa9649ae9a48bded2f4ccf,oe:player:8a0883855988aadb0c22117c2c96da7,oe:player:2e6e9d5865bd2e4e5a4c33372366cb6,oe:player:32021f59c40539f27ff6300bb840ca2,oe:player:22d4497301dd0fefcac2d838ad2d1b4,oe:player:d9993db0d276065093b77b8e86996fc,oe:player:e5841c7215d35ed0c9522450e7fb57a,oe:player:7773e8883f4bf85d29f4e7a0b2d9709,oe:player:77c10036edafc6b267a710e0525bf7a,oe:player:f987efb3da0d056699079e5f6755ef2
ESPORTSTMNT01_3333424,13.05,oe:player:d18a2aec1fa9649ae9a48bded2f4ccf,oe:player:8a0883855988aadb0c22117c2c96da7,oe:player:2e6e9d5865bd2e4e5a4c33372366cb6,oe:player:32021f59c40539f27ff6300bb840ca2,oe:player:22d4497301dd0fefcac2d838ad2d1b4,oe:player:d9993db0d276065093b77b8e86996fc,oe:player:e5841c7215d35ed0c9522450e7fb57a,oe:player:7773e8883f4bf85d29f4e7a0b2d9709,oe:player:77c10036edafc6b267a710e0525bf7a,oe:player:f987efb3da0d056699079e5f6755ef2


### Summarizing the results and parameters from perspective of blue

In [1389]:
only_blue_2021 = filtered_df_2021.loc[filtered_df_2021['side'].isin(["Blue"])].set_index('gameid')
only_red_2021 = filtered_df_2021.loc[filtered_df_2021['side'].isin(["Red"])].set_index('gameid')

only_blue_2022 = filtered_df_2022.loc[filtered_df_2022['side'].isin(["Blue"])].set_index('gameid')
only_red_2022 = filtered_df_2022.loc[filtered_df_2022['side'].isin(["Red"])].set_index('gameid')

only_blue_2023 = filtered_df_2023.loc[filtered_df_2023['side'].isin(["Blue"])].set_index('gameid')
only_red_2023 = filtered_df_2023.loc[filtered_df_2023['side'].isin(["Red"])].set_index('gameid')

blue_vs_red_2021 = pd.merge(only_blue_2021['teamid'], only_red_2021['teamid'], how='left', left_index=True, right_index=True)
blue_vs_red_2022 = pd.merge(only_blue_2022['teamid'], only_red_2022['teamid'], how='left', left_index=True, right_index=True)
blue_vs_red_2023 = pd.merge(only_blue_2023['teamid'], only_red_2023['teamid'], how='left', left_index=True, right_index=True)

blue_vs_red = pd.concat([blue_vs_red_2021, blue_vs_red_2022, blue_vs_red_2023]).reset_index()
blue_results = pd.concat([only_blue_2021[['result']], only_blue_2022[['result']], only_blue_2023[['result']]]).reset_index()
blue_vs_red = blue_vs_red.dropna(axis=0)

In [1390]:
blue_vs_red = blue_vs_red.dropna(axis=0)

In [1391]:
# Merge the blue side data (controlled by 'teamid_x' on the left side)
model_df = pd.merge(blue_vs_red, wr, left_on=['gameid', 'teamid_x'], right_on=['gameid_x', 'teamid'], how="left") # wr blue side

model_df = pd.merge(model_df, streak, left_on=['gameid', 'teamid_x'], right_on=['gameid', 'teamid'], how="left") # streak blue side
model_df = pd.merge(model_df, gspd, left_on=['gameid', 'teamid_x'], right_on=['gameid_x', 'teamid'], how="left") # gspd blue side
model_df = pd.merge(model_df, vspm, left_on=['gameid', 'teamid_x'], right_on=['gameid_x', 'teamid'], how="left") # vspm red side

In [1392]:
# Merge the red side data (controlled by 'teamid_y' on the right side)
model_df = pd.merge(model_df, wr, left_on=['gameid', 'teamid_y'], right_on=['gameid_x', 'teamid'], how="left") # wr red side
model_df = pd.merge(model_df, streak, left_on=['gameid', 'teamid_y'], right_on=['gameid', 'teamid'], how="left") # streak red side
model_df = pd.merge(model_df, gspd, left_on=['gameid', 'teamid_y'], right_on=['gameid_x', 'teamid'], how="left") # gspd red side
model_df = pd.merge(model_df, vspm, left_on=['gameid', 'teamid_y'], right_on=['gameid_x', 'teamid'], how="left") # vspm red side

# Merge the results from blue perspective
model_df = pd.merge(model_df, blue_results, left_on=['gameid'], right_on=['gameid'])

In [1393]:
model_df['wr_diff'] = model_df['mixed_wr_x'] - model_df['mixed_wr_y']
model_df['streak_diff'] = model_df['streak_x'] - model_df['streak_y']
model_df['gspd_diff'] = model_df['mixed_gspd_x'] - model_df['mixed_gspd_y']
model_df['vspm_diff'] = model_df['mixed_vspm_x'] - model_df['mixed_vspm_y']

# Merge the champion wr difference
model_df = pd.merge(model_df, chmp_wr, left_on='gameid', right_on='gameid')

# Merge the player experience difference
model_df = pd.merge(model_df, exp, left_on='gameid', right_on='gameid')

# Merge the player experience difference
model_df = pd.merge(model_df, dmg, left_on='gameid', right_on='gameid')

model_df = model_df.drop(['teamid_x', 'teamid_y', 'mixed_wr_x', 'streak_x', 'mixed_gspd_x', 'mixed_vspm_x', 'mixed_wr_y', 'streak_y', 'mixed_gspd_y', 'mixed_vspm_y'], axis=1)
model_df = model_df.set_index('gameid')

In [1394]:
model_df

Unnamed: 0_level_0,result,wr_diff,streak_diff,gspd_diff,vspm_diff,chmp_wr_diff,exp_diff,comp_diff
gameid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ESPORTSTMNT03/1671978,1,-19.462489,0,-5.068608,-0.061418,7.000,10.8,0
ESPORTSTMNT03/1671992,1,22.211804,-1,8.548898,0.341123,0.200,-6.0,0
ESPORTSTMNT03/1672030,1,-18.254386,0,-5.069502,0.161160,1.800,-4.0,0
ESPORTSTMNT03/1672038,1,-13.028571,0,-1.186776,-0.299820,3.800,-1.6,0
ESPORTSTMNT03/1672041,0,-17.615262,0,-5.939480,-0.513825,-1.600,30.2,0
...,...,...,...,...,...,...,...,...
ESPORTSTMNT01_3335201,0,-5.315237,0,-4.358168,-0.907785,1.824,0.0,-1
ESPORTSTMNT01_3333294,0,-8.951149,0,-4.732422,-0.845291,1.174,7.2,0
ESPORTSTMNT01_3333302,0,-21.768725,-1,-7.174507,0.646078,0.638,0.0,-1
ESPORTSTMNT06_2778084,1,5.505115,1,3.081617,-0.214532,-8.326,-22.8,-1


In [1395]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [1396]:
train = model_df[0:(int(len(model_df)*0.9))]
test = model_df[(int(len(model_df)*0.9)):]

In [1397]:
X_train = train[['wr_diff', 'gspd_diff']]
y_train = train['result']

X_test = test[['wr_diff', 'gspd_diff']]
y_test = test['result']

model = LogisticRegression()
model.fit(X_train, y_train)

model.score(X_test, y_test)

0.6172506738544474

In [1398]:
X_train = train[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff']]
y_train = train['result']

X_test = test[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff']]
y_test = test['result']

model = LogisticRegression()
model.fit(X_train, y_train)

model.score(X_test, y_test)

0.628032345013477

In [1399]:
X_train = train[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff', 'exp_diff']]
y_train = train['result']

X_test = test[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff', 'exp_diff']]
y_test = test['result']

model = LogisticRegression()
model.fit(X_train, y_train)

model.score(X_test, y_test)

0.633423180592992

In [1400]:
X_train = train[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff', 'exp_diff', 'vspm_diff', 'comp_diff']]
y_train = train['result']

X_test = test[['wr_diff', 'gspd_diff', 'streak_diff', 'chmp_wr_diff', 'exp_diff', 'vspm_diff', 'comp_diff']]
y_test = test['result']

model = LogisticRegression()
model.fit(X_train, y_train)

model.score(X_test, y_test)

0.6361185983827493