## Dependencies

In [1]:
from collections import defaultdict

import pandas as pd

## Load and Preprocess Data

In [2]:
events_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/v1/events.csv")
matches_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/v1/matches.csv")

In [3]:
event_to_week = {r.key: r.week for _, r in events_df.iterrows()}
matches_df["week"] = matches_df["event"].apply(lambda x: event_to_week[x])
matches_df["win_prob"] = matches_df["winner"].apply(lambda x: {"blue": 0, "draw": 0.5, "red": 1}[x])
matches_df["elo_acc"] = matches_df.apply(lambda x: x["elo_winner"] == x["winner"], axis=1).astype(int)
matches_df["elo_mse"] = matches_df.apply(lambda x: (x["elo_win_prob"] - x["win_prob"]) ** 2, axis=1)
matches_df["opr_acc"] = matches_df.apply(lambda x: x["opr_winner"] == x["winner"], axis=1).astype(int)
matches_df["opr_mse"] = matches_df.apply(lambda x: (x["opr_win_prob"] - x["win_prob"]) ** 2, axis=1)
matches_df["mix_acc"] = matches_df.apply(lambda x: x["mix_winner"] == x["winner"], axis=1).astype(int)
matches_df["mix_mse"] = matches_df.apply(lambda x: (x["mix_win_prob"] - x["win_prob"]) ** 2, axis=1)

In [4]:
sykes_dfs = []
for year in range(2002, 2023):
    if year == 2021:
        continue
        
    curr_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/elo/" + str(year) + ".csv")
    curr_df["year"] = year
    
    sykes_dfs.append(curr_df)

sykes_df = pd.concat(sykes_dfs)

In [5]:
sykes_df["week"] = sykes_df["Key"].apply(lambda x: event_to_week.get(x, -1))
sykes_df["elo_win_prob"] = sykes_df["Red win likelihood"].apply(lambda x: x / 100 if type(x) == float else float(x.split("%")[0]) / 100)
sykes_df["elo_winner"] = sykes_df["elo_win_prob"].apply(lambda x: 1 if x > 0.5 else 0)
sykes_df["elo_acc"] = sykes_df["elo_winner"] == sykes_df["Red victory"]
sykes_df["elo_mse"] = sykes_df.apply(lambda x: (x["elo_win_prob"] - x["Red victory"]) ** 2, axis=1)

## Simple Baseline

In [6]:
for year in range(2002, 2023):
    if year == 2021:
        continue
        
    curr_year_matches_df = matches_df[matches_df.year == year]

    acc = 0
    mse = 0
    count = 0

    team_wins = defaultdict(int)
    for _, r in curr_year_matches_df.sort_values(by=["time"]).iterrows():
        red_wins = sum([team_wins[int(x)] for x in r.red.split(",")])
        blue_wins = sum([team_wins[int(x)] for x in r.blue.split(",")])
        winner = {"blue": 0, "draw": 0.5, "red": 1}[r.winner]
        win_prob = max(0.5, red_wins) / max(1, red_wins + blue_wins)
        
        # Uncomment/indent these lines for Champs-specific results
        # if event_to_week[r.event] == 8:
        acc += round(win_prob) == winner
        mse += (win_prob - winner) ** 2
        count += 1

        teams = {"blue": r.blue.split(","), "draw": [], "red": r.red.split(",")}[r.winner]
        for t in teams:
            team_wins[int(t)] += 1

    if count > 0:
        print(year, "\t", round(acc / count, 4), "  \t", round(mse / count, 4))

2002 	 0.5812   	 0.2367
2003 	 0.6105   	 0.2333
2004 	 0.6104   	 0.2186
2005 	 0.6197   	 0.2231
2006 	 0.6351   	 0.2215
2007 	 0.5842   	 0.2209
2008 	 0.6347   	 0.2157
2009 	 0.6297   	 0.2202
2010 	 0.6019   	 0.1948
2011 	 0.6676   	 0.1978
2012 	 0.641   	 0.2162
2013 	 0.6738   	 0.2132
2014 	 0.6624   	 0.218
2015 	 0.666   	 0.2149
2016 	 0.6523   	 0.2173
2017 	 0.6166   	 0.2241
2018 	 0.6829   	 0.2158
2019 	 0.6568   	 0.2155
2020 	 0.6196   	 0.2367
2022 	 0.6918   	 0.209


## Display Results

In [7]:
print("Elo Acc\t\tElo MSE\t\tOPR Acc\t\tOPR MSE\t\tMix Acc\t\tMix MSE\t\tSykes Elo Acc\tSykes Elo MSE")
for year in range(2002, 2023):
    if year == 2021:
        continue

    curr_year_matches_df = matches_df[matches_df.year == year]
    curr_year_sykes_df = sykes_df[sykes_df.year == year]
    
    # Uncomment these lines for Champs-specific results
    # curr_year_matches_df = curr_year_matches_df[curr_year_matches_df.week == 8]
    # curr_year_sykes_df = curr_year_sykes_df[curr_year_sykes_df.week == 8]
    
    N = curr_year_matches_df.shape[0]
    sykes_N = curr_year_sykes_df.shape[0]
    
    elo_acc = curr_year_matches_df["elo_acc"].mean()
    elo_mse = curr_year_matches_df["elo_mse"].mean()
    opr_acc = curr_year_matches_df["opr_acc"].mean()
    opr_mse = curr_year_matches_df["opr_mse"].mean()
    mix_acc = curr_year_matches_df["mix_acc"].mean()
    mix_mse = curr_year_matches_df["mix_mse"].mean()
    sykes_elo_acc = curr_year_sykes_df["elo_acc"].mean()
    sykes_elo_mse = curr_year_sykes_df["elo_mse"].mean()
    print(
        round(elo_acc, 4), "  \t", round(elo_mse, 4), "  \t", 
        round(opr_acc, 4), "  \t", round(opr_mse, 4), "  \t",
        round(mix_acc, 4), "  \t", round(mix_mse, 4), "  \t",
        round(sykes_elo_acc, 4), "  \t", round(sykes_elo_mse, 4)
    )

Elo Acc		Elo MSE		OPR Acc		OPR MSE		Mix Acc		Mix MSE		Sykes Elo Acc	Sykes Elo MSE
0.5626   	 0.2355   	 0.5298   	 0.2643   	 0.5389   	 0.2428   	 0.5544   	 0.2355
0.6136   	 0.2231   	 0.6007   	 0.2414   	 0.6202   	 0.2245   	 0.619   	 0.2229
0.6213   	 0.2124   	 0.6263   	 0.218   	 0.6398   	 0.2055   	 0.6159   	 0.2133
0.662   	 0.2085   	 0.594   	 0.2388   	 0.6615   	 0.2084   	 0.6625   	 0.2086
0.6613   	 0.2041   	 0.6381   	 0.2225   	 0.6652   	 0.2019   	 0.6655   	 0.2031
0.619   	 0.2093   	 0.4969   	 0.2542   	 0.6077   	 0.2142   	 0.6655   	 0.2031
0.6642   	 0.1939   	 0.633   	 0.2151   	 0.6578   	 0.1928   	 0.662   	 0.1953
0.6808   	 0.1959   	 0.6496   	 0.2194   	 0.6801   	 0.1967   	 0.6816   	 0.1957
0.6239   	 0.1764   	 0.5506   	 0.206   	 0.6244   	 0.1779   	 0.6248   	 0.1762
0.7093   	 0.1677   	 0.6466   	 0.1909   	 0.7051   	 0.1694   	 0.7097   	 0.1679
0.6837   	 0.1877   	 0.6739   	 0.1956   	 0.6953   	 0.184   	 0.685   	 0.1874
0.72