In [1]:
from collections import defaultdict
import json
import statistics

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/years.csv")

In [3]:
team_years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/team_years.csv")

In [4]:
events_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/events.csv")

In [5]:
matches_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/matches.csv")

In [6]:
year_matches_dict = {k: v for k, v in matches_df.groupby(["year"])}
year_teams_dict = {k: v for k, v in team_years_df.groupby(["year"])}

In [7]:
elos = defaultdict(dict)
auto_elos = defaultdict(dict)
teleop_elos = defaultdict(dict)
endgame_elos = defaultdict(dict)
all_elos = defaultdict(dict)

for year in range(2014, 2016):
    for _, team_year in year_teams_dict[year].iterrows():
        team_num = team_year.team
        elos[year][team_num] = 8 * [team_year.elo_max]
        all_elos[year][team_num] = 8 * [team_year.elo_max]

In [8]:
def get_stats(year):
    week_one = list(events_df[(events_df.year == year) & (events_df.week == 1)]["key"])
    autos, teleops, endgames, scores = [], [], [], []
    for _, match in matches_df[(matches_df.year == year)].iterrows():
        if match.event in week_one:
            if match.red_auto >= 0 and match.blue_auto >= 0:
                autos.extend([match.red_auto, match.blue_auto])
            if match.red_teleop >= 0 and match.blue_teleop >= 0:
                teleops.extend([match.red_score - match.red_auto - match.red_endgame, match.blue_score - match.blue_auto - match.red_auto])
            if match.red_endgame >= 0 and match.blue_endgame >= 0:
                endgames.extend([match.red_endgame, match.blue_endgame])
            if match.red_score >= 0 and match.blue_score >= 0:
                scores.extend([match.red_score, match.blue_score])

    auto_mean, auto_sd = sum(autos) / len(autos), statistics.pstdev(autos)
    teleop_mean, teleop_sd = sum(teleops) / len(teleops), statistics.pstdev(teleops)
    endgame_mean, endgame_sd = sum(endgames) / len(endgames), statistics.pstdev(endgames)
    score_mean, score_sd = sum(scores) / len(scores), statistics.pstdev(scores)

    return auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd

year_stats = {year: get_stats(year) for year in range(2016, 2021)}

In [9]:
global_power = 0.90
global_factor = 0.25

def score_pred_func(red_score, blue_score, pred_margin):
    red_win_prob = 1 / (10 ** (250 * (-pred_margin) / 400) + 1)
    
    if red_score > blue_score:
        return red_win_prob >= 0.5, (1 - red_win_prob) ** 2 
    elif red_score < blue_score:
        return red_win_prob <= 0.5, (red_win_prob) ** 2
    
    return 0, (0.5 - red_win_prob) ** 2

def nl(x, power):
    if x == 0:
        return x
    return x / abs(x) * abs(x) ** power


def elo_sum(x, factor):
    x = list(x)
    return sum(x) + factor * max(x) - factor * min(x)

def update_elo(elo, update, elos, factor):
    mult = 1
    if elo == max(elos):
        mult += factor
    elif elo == min(elos):
        mult -= factor
    return round(elo + mult * update, 2)

total_acc_new, total_mse_new = 0, 0
total_acc_old, total_mse_old = 0, 0
total_acc_mix, total_mse_mix = 0, 0
for year in range(2016, 2021):
    count = 0
    acc_new, mse_new = 0, 0
    acc_old, mse_old = 0, 0
    acc_mix, mse_mix = 0, 0
    
    years_row = years_df[years_df.year == year].iloc[0]
    
    elo_acc, elo_mse = years_row.elo_acc, years_row.elo_mse
    auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd = year_stats[year]
    
    year_matches_df = year_matches_dict[year].sort_values(by=["time"])
    year_teams_df = year_teams_dict[year]
    
    for _, team_year in year_teams_df.iterrows():
        team_num = team_year.team
        
        for (elo_dict, source_elo_dict) in [(elos, elos), (auto_elos, auto_elos), (teleop_elos, all_elos), (endgame_elos, all_elos)]:
            elo_1yr = source_elo_dict[year - 1].get(team_num, elos[year - 1].get(team_num, []))
            elo_1yr = 1450 if len(elo_1yr) < 8 else max(elo_1yr[7:])
            elo_2yr = source_elo_dict[year - 2].get(team_num, elos[year - 2].get(team_num, []))
            elo_2yr = 1450 if len(elo_2yr) < 8 else max(elo_2yr[7:])
            elo_dict[year][team_num] = [0.56 * elo_1yr + 0.24 * elo_2yr + 0.20 * 1450]
    
    for _, match in year_matches_df.iterrows():
        if not (match.red_auto >= 0 and match.red_teleop >= 0 and match.red_endgame >= 0 and match.red_score >= 0):
            continue
            
        red_teams = [int(x) for x in match.red.split(",")]
        blue_teams = [int(x) for x in match.blue.split(",")]
        
        red_elo_sum = elo_sum([elos[year][x][-1] for x in red_teams], 0)
        blue_elo_sum = elo_sum([elos[year][x][-1] for x in blue_teams], 0)
        pred_margin1 = (red_elo_sum - blue_elo_sum) / 250
        acc, mse = score_pred_func(match.red_score, match.blue_score, pred_margin1)
        acc_old += acc
        mse_old += mse
        
        red_auto_pred = auto_mean + auto_sd * nl((elo_sum([auto_elos[year][x][-1] for x in red_teams], global_factor) - 4500) / 250, global_power)
        blue_auto_pred = auto_mean + auto_sd * nl((elo_sum([auto_elos[year][x][-1] for x in blue_teams], global_factor) - 4500) / 250, global_power)
        red_endgame_pred = endgame_mean + endgame_sd * nl((elo_sum([endgame_elos[year][x][-1] for x in red_teams], global_factor) - 4500) / 250, global_power)
        blue_endgame_pred = endgame_mean + endgame_sd * nl((elo_sum([endgame_elos[year][x][-1] for x in blue_teams], global_factor) - 4500) / 250, global_power)
        red_teleop = nl(elo_sum([teleop_elos[year][x][-1] for x in red_teams], global_factor) / 250, global_power)
        blue_teleop = nl(elo_sum([teleop_elos[year][x][-1] for x in blue_teams], global_factor) / 250, global_power)
        teleop_margin = teleop_sd * (red_teleop - blue_teleop)
        pred_margin2 = ((red_auto_pred - blue_auto_pred) + teleop_margin + (red_endgame_pred - blue_endgame_pred)) / score_sd
        acc, mse = score_pred_func(match.red_score, match.blue_score, pred_margin2)
        acc_new += acc
        mse_new += mse
        
        acc, mse = score_pred_func(match.red_score, match.blue_score, (pred_margin1 + pred_margin2) / 2)
        acc_mix += acc
        mse_mix += mse
        
        count += 1
        
        k = 4 if match.playoff else 12
        
        for (mean, sd, elo_dict, teams, get_func, power, factor) in [
            (auto_mean, auto_sd, auto_elos, red_teams, lambda m: m.red_auto, global_power, global_factor),
            (auto_mean, auto_sd, auto_elos, blue_teams, lambda m: m.blue_auto, global_power, global_factor),
            (endgame_mean, endgame_sd, endgame_elos, red_teams, lambda m: m.red_endgame, global_power, global_factor),
            (endgame_mean, endgame_sd, endgame_elos, blue_teams, lambda m: m.blue_endgame, global_power, global_factor),
        ]:
            temp_elos = [elo_dict[year][x][-1] for x in teams]
            temp_elo_sum = elo_sum(temp_elos, factor)
            score_pred = mean / sd + nl((temp_elo_sum - 4500) / 250, power)
            score = get_func(match) / sd
            for x, temp_elo in zip(teams, temp_elos):
                elo_dict[year][x].append(update_elo(elo_dict[year][x][-1], k * (score - score_pred), temp_elos, factor))
        
        for (sd, elo_dict, red_get_func, blue_get_func, power, factor) in [
            (teleop_sd, teleop_elos, lambda m: m.red_score - m.red_auto - m.red_endgame, lambda m: m.blue_score - m.blue_auto - m.blue_endgame, global_power, global_factor),
            (score_sd, elos, lambda m: m.red_score, lambda m: m.blue_score, 1, 0),
        ]:
            red_elos = [elo_dict[year][x][-1] for x in red_teams]
            red_elo_sum = elo_sum(red_elos, factor)
            blue_elos = [elo_dict[year][x][-1] for x in blue_teams]
            blue_elo_sum = elo_sum(blue_elos, factor)
            win_margin = (red_get_func(match) - blue_get_func(match)) / sd
            pred_win_margin = nl(red_elo_sum / 250, power) - nl(blue_elo_sum / 250, power)
            for x, temp_elo in zip(red_teams, red_elos):
                elo_dict[year][x].append(update_elo(elo_dict[year][x][-1], k * (win_margin - pred_win_margin), red_elos, factor))
            for x, temp_elo in zip(blue_teams, blue_elos):
                elo_dict[year][x].append(update_elo(elo_dict[year][x][-1], -k * (win_margin - pred_win_margin), blue_elos, factor))
    
    all_elos[year] = {
        k: [
            round(
            1500 + 
            (auto_sd * (auto_elos[year][k][i] - 1500) + teleop_sd * (teleop_elos[year][k][i] - 1500) + endgame_sd * (endgame_elos[year][k][i] - 1500)) / 
            (auto_sd + teleop_sd + endgame_sd), 2)
            for i in range(len(elos[year][k]))
        ]for k in elos[year].keys()
    }
    
    total_acc_new += acc_new / count
    total_mse_new += mse_new / count
    total_acc_old += acc_old / count
    total_mse_old += mse_old / count
    total_acc_mix += acc_mix / count
    total_mse_mix += mse_mix / count

    print(year, "\t", round(acc_new / count, 4), round(mse_new / count, 4), "\t", round(acc_old / count, 4), round(mse_old / count , 4), "\t", round(acc_mix / count, 4), round(mse_mix / count, 4))
    
print()
print("Avg", "\t", round(total_acc_new / 5, 4), round(total_mse_new / 5, 4), "\t", round(total_acc_old / 5, 4), round(total_mse_old / 5 , 4), "\t", round(total_acc_mix / 5, 4), round(total_mse_mix / 5, 4))

2016 	 0.7066 0.1853 	 0.7023 0.1864 	 0.7064 0.1853
2017 	 0.6573 0.2099 	 0.6498 0.2092 	 0.6562 0.2076
2018 	 0.7478 0.1704 	 0.7402 0.1723 	 0.744 0.1705
2019 	 0.707 0.181 	 0.7025 0.1827 	 0.7036 0.1811
2020 	 0.7042 0.1907 	 0.6948 0.193 	 0.7003 0.191

Avg 	 0.7046 0.1875 	 0.6979 0.1887 	 0.7021 0.1871


In [10]:
year = 2018

new_data = list(sorted(all_elos[year].items(), key=lambda x: -x[1][-1]))[:10]
old_data = list(sorted(elos[year].items(), key=lambda x: -x[1][-1]))[:10]

for num, data in new_data:
    print(num, "\t", data[-1])
    
print("\n")
    
for num, data in old_data:
    print(num, "\t", data[-1])

254 	 2016.25
2056 	 2013.7
1678 	 1956.7
2046 	 1949.42
2767 	 1940.69
195 	 1938.25
694 	 1923.14
1323 	 1921.33
2910 	 1918.63
118 	 1918.61


254 	 2048.59
2056 	 2041.06
1678 	 1998.25
2046 	 1972.38
694 	 1953.41
2910 	 1950.63
1323 	 1947.59
195 	 1943.74
2590 	 1942.81
2767 	 1941.71
