In [1]:
from collections import defaultdict
import json
import statistics

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/years.csv")

In [3]:
team_years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/team_years.csv")

In [4]:
events_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/events.csv")

In [5]:
matches_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/matches.csv")

In [6]:
year_matches_dict = {k: v for k, v in matches_df.groupby(["year"])}
year_events_dict = {k: v for k, v in events_df.groupby(["year"])}
year_teams_dict = {k: v for k, v in team_years_df.groupby(["year"])}
event_matches_dict = {k: v for k, v in matches_df.groupby(["event"])}

In [42]:
years = [2016, 2017, 2018, 2019, 2020, 2022]

def get_stats(year, week):
    week_events = list(events_df[(events_df.year == year) & (events_df.week == week)]["key"])
    autos, teleops, endgames, scores = [], [], [], []
    for _, match in matches_df[(matches_df.year == year)].iterrows():
        if match.event in week_events:
            if (
                match.red_auto >= 0 and match.blue_auto >= 0 and 
                match.red_teleop >= 0 and match.blue_teleop >= 0 and 
                match.red_endgame >= 0 and match.blue_endgame >= 0 and 
                match.red_score >= 0 and match.blue_score >= 0
            ):
                autos.extend([match.red_auto, match.blue_auto])
                teleops.extend([match.red_score - match.red_auto - match.red_endgame, match.blue_score - match.blue_auto - match.red_auto])
                endgames.extend([match.red_endgame, match.blue_endgame])
                scores.extend([match.red_score, match.blue_score])

    auto_mean, auto_sd = sum(autos) / len(autos), statistics.pstdev(autos)
    teleop_mean, teleop_sd = sum(teleops) / len(teleops), statistics.pstdev(teleops)
    endgame_mean, endgame_sd = sum(endgames) / len(endgames), statistics.pstdev(endgames)
    score_mean, score_sd = sum(scores) / len(scores), statistics.pstdev(scores)

    return auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd

year_stats = {year: get_stats(year, 1) for year in years}

In [43]:
def get_mse(p1, p2):
    return (p1 - p2) ** 2

def get_acc(p1, p2):
    return (p1 >= 0.5 and p2 >= 0.5) or (p1 < 0.5 and p2 < 0.5)

class Metrics:
    def __init__(self):
        self.metrics = {
            "pre_quals": [0, 0, 0],
            "pre_elims": [0, 0, 0],
            "pre_total": [0, 0, 0],
            "quals": [0, 0, 0],
            "elims": [0, 0, 0],
            "total": [0, 0, 0],
        }
        
    def add_match(self, pre, playoffs, p1, p2):
        keys = ["elims" if playoffs else "quals", "total"]
        for key in keys:
            key = "pre_" + key if pre else key
            self.metrics[key][0] += 1
            self.metrics[key][1] += get_mse(p1, p2)
            self.metrics[key][2] += get_acc(p1, p2)
            
    def get(self, key):
        count, mse, acc = self.metrics[key]
        return round(mse / count, 4), round(acc / count, 4), count
    
    def __add__(self, other):
        for key in other.metrics:
            for i in range(3):
                self.metrics[key][i] += other.metrics[key][i]
        return self

In [44]:
class EloModel:
    def __init__(self, year_stats, year_teams_dict):
        self.year_stats = year_stats
        
        self.elos = defaultdict(dict)
        for year in range(2014, 2016):
            for _, team_year in year_teams_dict[year].iterrows():
                self.elos[year][team_year.team] = team_year.elo_end
        
    def _init_team_year(self, year, team):
        elos = [1450, 1450]
        for team_year in range(year - 4, year):
            if team in self.elos[team_year]:
                elos.append(self.elos[team_year][team])
        self.elos[year][team] = (elos[-1] * 0.56) + (elos[-2] * 0.24) + (1450 * 0.20)
                
    def _get(self, year, team):
        if team not in self.elos[year]:
             self._init_team_year(year, team)
        return self.elos[year][team]
    
    def _get_pred_margin(self, match):
        red_sum = sum([self._get(match.year, int(t)) for t in match.red.split(",")])
        blue_sum = sum([self._get(match.year, int(t)) for t in match.blue.split(",")])
        return (red_sum - blue_sum) / 250
    
    def win_prob(self, match):
        return 1 / (10 ** (-250 * self._get_pred_margin(match) / 400) + 1)
    
    def update(self, match):
        pred_margin = self._get_pred_margin(match)
        actual_margin = (match.red_score - match.blue_score) / self.year_stats[match.year][7]
        k = 3 if match.playoff else 12
        for t in match.red.split(","):
            self.elos[match.year][int(t)] += k * (actual_margin - pred_margin)
        for t in match.blue.split(","):
            self.elos[match.year][int(t)] -= k * (actual_margin - pred_margin)

In [45]:
class RVModel:
    def __init__(self, year_stats, year_teams_dict):
        self.year_stats = year_stats
        
        self.auto_rvs = defaultdict(dict)
        self.teleop_rvs = defaultdict(dict)
        self.endgame_rvs = defaultdict(dict)
        self.total_rvs = defaultdict(dict)
        for year in range(2014, 2016):
            for _, team_year in year_teams_dict[year].iterrows():
                self.elos[year][team_year.team] = team_year.elo_end
        
    def _init_team_year(self, year, team):
        elos = [1450, 1450]
        for team_year in range(year - 4, year):
            if team in self.elos[team_year]:
                elos.append(self.elos[team_year][team])
        self.elos[year][team] = (elos[-1] * 0.56) + (elos[-2] * 0.24) + (1450 * 0.20)
                
    def _get(self, year, team):
        if team not in self.elos[year]:
             self._init_team_year(year, team)
        return self.elos[year][team]
    
    def _get_pred_margin(self, match):
        red_sum = sum([self._get(match.year, int(t)) for t in match.red.split(",")])
        blue_sum = sum([self._get(match.year, int(t)) for t in match.blue.split(",")])
        return (red_sum - blue_sum) / 250
    
    def win_prob(self, match):
        return 1 / (10 ** (-250 * self._get_pred_margin(match) / 400) + 1)
    
    def update(self, match):
        pred_margin = self._get_pred_margin(match)
        actual_margin = (match.red_score - match.blue_score) / self.year_stats[match.year][7]
        k = 3 if match.playoff else 12
        for t in match.red.split(","):
            self.elos[match.year][int(t)] += k * (actual_margin - pred_margin)
        for t in match.blue.split(","):
            self.elos[match.year][int(t)] -= k * (actual_margin - pred_margin)

In [46]:
def simulate_event(elos, matches, print_logs=False):
    metrics = Metrics()
    for i, m in matches.iterrows():
        winner = {"red": 1, "blue": 0, "draw": 0.5}[m.winner]
        win_prob = elos.win_prob(m)
        metrics.add_match(True, m.playoff, win_prob, winner)
    for i, m in matches.iterrows():
        winner = {"red": 1, "blue": 0, "draw": 0.5}[m.winner]
        win_prob = elos.win_prob(m)
        metrics.add_match(False, m.playoff, win_prob, winner)
        elos.update(m)
    return metrics


elos = EloModel(year_stats, year_teams_dict)
        
baselines = {
    "2018carv": 0.157,
    "2018gal": 0.180,
    "2018hop": 0.180,
    "2018new": 0.127,
    "2018roe": 0.197,
    "2018tur": 0.160,
    "2019carv": 0.155,
    "2019gal": 0.126,
    "2019hop": 0.170,
    "2019new": 0.197,
    "2019roe": 0.179
}
best_baselines = {
    "2018carv": 0.157,
    "2018gal": 0.175,
    "2018hop": 0.165,
    "2018new": 0.126,
    "2018roe": 0.193,
    "2018tur": 0.153,
    "2019carv": 0.152,
    "2019gal": 0.118,
    "2019hop": 0.170,
    "2019new": 0.196,
    "2019roe": 0.179
}

all_metrics = Metrics()
for year in years:
    year_metrics = Metrics()
    for key in year_events_dict[year].sort_values(by=["time"])["key"].values:
        if key not in event_matches_dict:
            continue

        matches = event_matches_dict[key].sort_values(by=["time"])
        metrics = simulate_event(elos, matches, False)
        year_metrics += metrics
        if key in baselines:
            print(key, metrics.get("pre_quals")[0], baselines[key])
    
    all_metrics += year_metrics
    print()
    print(year, year_metrics.get("total"))
    print()
print("Overall", all_metrics.get("total"))


2016 (0.187, 0.7098, 13320)


2017 (0.2101, 0.6557, 15442)

2018roe 0.1966 0.197
2018gal 0.1814 0.18
2018new 0.1297 0.127
2018carv 0.1586 0.157
2018tur 0.1615 0.16
2018hop 0.1845 0.18

2018 (0.1743, 0.7379, 16930)

2019gal 0.1352 0.126
2019hop 0.1754 0.17
2019carv 0.155 0.155
2019roe 0.1826 0.179
2019new 0.2096 0.197

2019 (0.1835, 0.7117, 18035)


2020 (0.1942, 0.7047, 4571)


2022 (0.1608, 0.7615, 12946)

Overall (0.1842, 0.7137, 81244)


In [136]:
elos = defaultdict(dict)
auto_elos = defaultdict(dict)
teleop_elos = defaultdict(dict)
endgame_elos = defaultdict(dict)
all_elos = defaultdict(dict)

for year in range(2014, 2016):
    for _, team_year in year_teams_dict[year].iterrows():
        team_num = team_year.team
        elos[year][team_num] = team_year.elo_start
        all_elos[year][team_num] = team_year.elo_start

In [13]:
global_power = 0.90
global_factor = 0.25

def score_pred_func(red_score, blue_score, pred_margin):
    red_win_prob = 1 / (10 ** (250 * (-pred_margin) / 400) + 1)
    
    if red_score > blue_score:
        return red_win_prob >= 0.5, (1 - red_win_prob) ** 2 
    elif red_score < blue_score:
        return red_win_prob <= 0.5, (red_win_prob) ** 2
    
    return 0, (0.5 - red_win_prob) ** 2

def nl(x, power):
    if x == 0:
        return x
    return x / abs(x) * abs(x) ** power


def elo_sum(x, factor):
    x = list(x)
    return sum(x) + factor * max(x) - factor * min(x)

def update_elo(elo, update, elos, factor):
    mult = 1
    if elo == max(elos):
        mult += factor
    elif elo == min(elos):
        mult -= factor
    return round(elo + mult * update, 2)

total_acc_new, total_mse_new = 0, 0
total_acc_old, total_mse_old = 0, 0
total_acc_mix, total_mse_mix = 0, 0
for year in range(2016, 2021):
    count = 0
    acc_new, mse_new = 0, 0
    acc_old, mse_old = 0, 0
    acc_mix, mse_mix = 0, 0
    
    years_row = years_df[years_df.year == year].iloc[0]
    
    elo_acc, elo_mse = years_row.elo_acc, years_row.elo_mse
    auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd = year_stats[year]
    
    year_matches_df = year_matches_dict[year].sort_values(by=["time"])
    year_teams_df = year_teams_dict[year]
    
    for _, team_year in year_teams_df.iterrows():
        team_num = team_year.team
        
        for (elo_dict, source_elo_dict) in [(elos, elos), (auto_elos, auto_elos), (teleop_elos, all_elos), (endgame_elos, all_elos)]:
            elo_1yr = source_elo_dict[year - 1].get(team_num, elos[year - 1].get(team_num, 1500))
            elo_2yr = source_elo_dict[year - 2].get(team_num, elos[year - 2].get(team_num, 1500))
            elo_dict[year][team_num] = 0.56 * elo_1yr + 0.24 * elo_2yr + 0.20 * 1450
    
    for _, match in year_matches_df.iterrows():
        if not (match.red_auto >= 0 and match.red_teleop >= 0 and match.red_endgame >= 0 and match.red_score >= 0):
            continue
            
        red_teams = [int(x) for x in match.red.split(",")]
        blue_teams = [int(x) for x in match.blue.split(",")]
        
        red_elo_sum = elo_sum([elos[year][x] for x in red_teams], 0)
        blue_elo_sum = elo_sum([elos[year][x] for x in blue_teams], 0)
        pred_margin1 = (red_elo_sum - blue_elo_sum) / 250
        acc, mse = score_pred_func(match.red_score, match.blue_score, pred_margin1)
        acc_old += acc
        mse_old += mse
        
        red_auto_pred = auto_mean + auto_sd * nl((elo_sum([auto_elos[year][x] for x in red_teams], global_factor) - 4500) / 250, global_power)
        blue_auto_pred = auto_mean + auto_sd * nl((elo_sum([auto_elos[year][x] for x in blue_teams], global_factor) - 4500) / 250, global_power)
        red_endgame_pred = endgame_mean + endgame_sd * nl((elo_sum([endgame_elos[year][x] for x in red_teams], global_factor) - 4500) / 250, global_power)
        blue_endgame_pred = endgame_mean + endgame_sd * nl((elo_sum([endgame_elos[year][x] for x in blue_teams], global_factor) - 4500) / 250, global_power)
        red_teleop = nl(elo_sum([teleop_elos[year][x] for x in red_teams], global_factor) / 250, global_power)
        blue_teleop = nl(elo_sum([teleop_elos[year][x] for x in blue_teams], global_factor) / 250, global_power)
        teleop_margin = teleop_sd * (red_teleop - blue_teleop)
        pred_margin2 = ((red_auto_pred - blue_auto_pred) + teleop_margin + (red_endgame_pred - blue_endgame_pred)) / score_sd
        acc, mse = score_pred_func(match.red_score, match.blue_score, pred_margin2)
        acc_new += acc
        mse_new += mse
        
        acc, mse = score_pred_func(match.red_score, match.blue_score, (pred_margin1 + pred_margin2) / 2)
        acc_mix += acc
        mse_mix += mse
        
        count += 1
        
        k = 4 if match.playoff else 12
        
        for (mean, sd, elo_dict, teams, get_func, power, factor) in [
            (auto_mean, auto_sd, auto_elos, red_teams, lambda m: m.red_auto, global_power, global_factor),
            (auto_mean, auto_sd, auto_elos, blue_teams, lambda m: m.blue_auto, global_power, global_factor),
            (endgame_mean, endgame_sd, endgame_elos, red_teams, lambda m: m.red_endgame, global_power, global_factor),
            (endgame_mean, endgame_sd, endgame_elos, blue_teams, lambda m: m.blue_endgame, global_power, global_factor),
        ]:
            temp_elos = [elo_dict[year][x] for x in teams]
            temp_elo_sum = elo_sum([elo_dict[year][x] for x in teams], factor)
            score_pred = mean / sd + nl((temp_elo_sum - 4500) / 250, power)
            score = get_func(match) / sd
            for x, temp_elo in zip(teams, temp_elos):
                elo_dict[year][x] = update_elo(elo_dict[year][x], k * (score - score_pred), temp_elos, factor)
        
        for (sd, elo_dict, red_get_func, blue_get_func, power, factor) in [
            (teleop_sd, teleop_elos, lambda m: m.red_score - m.red_auto - m.red_endgame, lambda m: m.blue_score - m.blue_auto - m.blue_endgame, global_power, global_factor),
            (score_sd, elos, lambda m: m.red_score, lambda m: m.blue_score, 1, 0),
        ]:
            red_elos = [elo_dict[year][x] for x in red_teams]
            red_elo_sum = elo_sum([elo_dict[year][x] for x in red_teams], factor)
            blue_elos = [elo_dict[year][x] for x in blue_teams]
            blue_elo_sum = elo_sum([elo_dict[year][x] for x in blue_teams], factor)
            win_margin = (red_get_func(match) - blue_get_func(match)) / sd
            pred_win_margin = nl(red_elo_sum / 250, power) - nl(blue_elo_sum / 250, power)
            for x, temp_elo in zip(red_teams, red_elos):
                elo_dict[year][x] = update_elo(elo_dict[year][x], k * (win_margin - pred_win_margin), red_elos, factor)
            for x, temp_elo in zip(blue_teams, blue_elos):
                elo_dict[year][x] = update_elo(elo_dict[year][x], -k * (win_margin - pred_win_margin), blue_elos, factor)
    
    all_elos[year] = {
        k: round(
        1500 + 
        (auto_sd * (auto_elos[year][k] - 1500) + teleop_sd * (teleop_elos[year][k] - 1500) + endgame_sd * (endgame_elos[year][k] - 1500)) / 
        (auto_sd + teleop_sd + endgame_sd), 2) for k in elos[year].keys()
    }
    
    total_acc_new += acc_new / count
    total_mse_new += mse_new / count
    total_acc_old += acc_old / count
    total_mse_old += mse_old / count
    total_acc_mix += acc_mix / count
    total_mse_mix += mse_mix / count

    print(year, "\t", round(acc_new / count, 4), round(mse_new / count, 4), "\t", round(acc_old / count, 4), round(mse_old / count , 4), "\t", round(acc_mix / count, 4), round(mse_mix / count, 4))
    
print()
print("Avg", "\t", round(total_acc_new / 5, 4), round(total_mse_new / 5, 4), "\t", round(total_acc_old / 5, 4), round(total_mse_old / 5 , 4), "\t", round(total_acc_mix / 5, 4), round(total_mse_mix / 5, 4))

2016 	 0.6997 0.189 	 0.6955 0.1909 	 0.6988 0.1894
2017 	 0.6537 0.2088 	 0.6405 0.2114 	 0.6532 0.208
2018 	 0.7453 0.1728 	 0.7331 0.1758 	 0.7412 0.1733
2019 	 0.7064 0.1815 	 0.6981 0.1847 	 0.7053 0.1823
2020 	 0.7073 0.1911 	 0.6988 0.1953 	 0.704 0.1923

Avg 	 0.7025 0.1886 	 0.6932 0.1916 	 0.7005 0.189
