In [1]:
from collections import defaultdict
import json
import statistics

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/years.csv")

In [3]:
team_years_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/team_years.csv")

In [4]:
events_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/events.csv")

In [5]:
matches_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/matches.csv")

In [6]:
team_matches_df = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/team_matches.csv")

In [7]:
year_matches_dict = {k: v for k, v in matches_df.groupby(["year"])}
year_events_dict = {k: v for k, v in events_df.groupby(["year"])}
year_teams_dict = {k: v for k, v in team_years_df.groupby(["year"])}
event_matches_dict = {k: v for k, v in matches_df.groupby(["event"])}

In [8]:
elo_baselines = {
    "2018carv": 0.157,
    "2018gal": 0.180,
    "2018hop": 0.180,
    "2018new": 0.127,
    "2018roe": 0.197,
    "2018tur": 0.160,
    "2019carv": 0.155,
    "2019gal": 0.126,
    "2019hop": 0.170,
    "2019new": 0.197,
    "2019roe": 0.179
}

best_baselines = {
    "2018carv": 0.157,
    "2018gal": 0.175,
    "2018hop": 0.165,
    "2018new": 0.126,
    "2018roe": 0.193,
    "2018tur": 0.153,
    "2019carv": 0.152,
    "2019gal": 0.118,
    "2019hop": 0.170,
    "2019new": 0.196,
    "2019roe": 0.179
}

In [9]:
years = [2016, 2017, 2018, 2019, 2020, 2022]

def get_stats(year):
    event_weeks = {}
    autos, teleops, endgames, scores = {}, {}, {}, {}
    
    for _, event in events_df[(events_df.year == year)].iterrows():
        event_weeks[event.key] = event.week
        
    for week in set(event_weeks.values()):
        autos[week] = []
        teleops[week] = []
        endgames[week] = []
        scores[week] = []

    for _, match in matches_df[(matches_df.year == year) & (matches_df.playoff == False)].iterrows():
        week = event_weeks[match.event]
        if match.red_auto >= 0 and match.blue_auto >= 0:
            autos[week].extend([match.red_auto, match.blue_auto])
        if match.red_teleop >= 0 and match.blue_teleop >= 0:
            teleops[week].extend([match.red_teleop, match.blue_teleop])
        if match.red_endgame >= 0 and match.blue_endgame >= 0:
            endgames[week].extend([match.red_endgame, match.blue_endgame])
        if match.red_score >= 0 and match.blue_score >= 0:
            scores[week].extend([match.red_score, match.blue_score])

    out = {}
    for week in set(event_weeks.values()):
        autos[week] = [x for x in autos[week] if x >= 0]
        teleops[week] = [x for x in teleops[week] if x >= 0]
        endgames[week] = [x for x in endgames[week] if x >= 0]
        scores[week] = [x for x in scores[week] if x >= 0]
        if len(autos[week]) > 0:
            auto_mean, auto_sd = sum(autos[week]) / len(autos[week]), statistics.pstdev(autos[week])
            teleop_mean, teleop_sd = sum(teleops[week]) / len(teleops[week]), statistics.pstdev(teleops[week])
            endgame_mean, endgame_sd = sum(endgames[week]) / len(endgames[week]), statistics.pstdev(endgames[week])
            score_mean, score_sd = sum(scores[week]) / len(scores[week]), statistics.pstdev(scores[week])
            out[week] = auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd

    return out

year_stats = {year: get_stats(year) for year in years}

for year in year_stats:
    auto_mean, auto_sd, teleop_mean, teleop_sd, endgame_mean, endgame_sd, score_mean, score_sd = year_stats[year][1]
    print(
        year, "\t", 
        "{:3.2f}".format(auto_mean), "  \t", 
        "{:3.2f}".format(auto_sd), "  \t", 
        "{:3.2f}".format(teleop_mean), "  \t", 
        "{:3.2f}".format(teleop_sd), "  \t", 
        "{:3.2f}".format(endgame_mean), "  \t", 
        "{:3.2f}".format(endgame_sd), "  \t", 
        "{:3.2f}".format(score_mean), "  \t",
        "{:3.2f}".format(score_sd),
    )

2016 	 14.61   	 8.31   	 35.15   	 12.42   	 7.77   	 4.89   	 61.63   	 20.66
2017 	 27.76   	 27.35   	 74.64   	 27.21   	 53.90   	 44.31   	 164.47   	 64.22
2018 	 19.99   	 11.04   	 181.01   	 84.86   	 39.62   	 22.24   	 256.61   	 107.08
2019 	 10.50   	 3.70   	 18.81   	 10.11   	 9.81   	 5.43   	 41.30   	 15.61
2020 	 25.86   	 12.49   	 18.96   	 15.53   	 31.91   	 21.69   	 85.60   	 40.92
2022 	 10.65   	 6.10   	 14.44   	 10.99   	 10.66   	 8.11   	 38.02   	 20.08


In [10]:
for year in years:
    weeks, autos, teleops, endgames, scores = [], [], [], [], []
    print(year)
    for week in range(1, 7):
        if week in year_stats[year]:
            weeks.append(week)
            autos.append(year_stats[year][week][0])
            teleops.append(year_stats[year][week][2])
            endgames.append(year_stats[year][week][4])
            scores.append(year_stats[year][week][6])
            print(week, "\t", "\t".join([str(round(x, 2)) for x in year_stats[year][week]]))

    """
    fig, ax = plt.subplots()
    ax.plot(weeks, autos, label="Auto")
    ax.plot(weeks, teleops, label="Teleop")
    ax.plot(weeks, endgames, label="Endgame")
    ax.plot(weeks, scores, label="Score")
    ax.legend()
    fig.show()
    """

2016
1 	 14.61	8.31	35.15	12.42	7.77	4.89	61.63	20.66
2 	 15.94	8.65	36.38	13.34	8.36	4.9	64.63	22.18
3 	 17.77	8.34	39.3	13.35	9.12	4.67	70.47	21.68
4 	 20.32	8.35	42.44	14.03	9.53	4.68	77.59	22.78
5 	 20.95	8.64	44.64	14.73	9.72	4.53	80.62	23.38
6 	 22.0	8.53	44.54	14.59	9.92	4.52	81.88	23.42
2017
1 	 27.76	27.35	74.64	27.21	53.9	44.31	164.47	64.22
2 	 33.0	29.43	76.72	26.54	60.35	45.84	178.63	66.66
3 	 38.48	30.52	81.48	26.95	72.84	45.31	200.84	67.02
4 	 42.51	30.88	84.39	27.35	83.93	45.49	218.86	67.28
5 	 46.91	31.24	88.11	26.24	87.33	44.72	230.97	66.02
6 	 50.96	31.05	93.37	28.16	97.9	45.33	249.03	68.59
2018
1 	 19.99	11.04	181.01	84.86	39.62	22.24	256.61	107.08
2 	 20.47	11.19	184.02	85.83	38.61	22.46	262.87	109.26
3 	 21.93	10.94	188.24	86.01	42.24	22.3	271.24	108.69
4 	 24.06	11.18	195.66	85.39	44.74	21.74	284.55	107.92
5 	 25.57	11.45	197.85	84.19	46.95	22.68	291.88	107.0
6 	 26.19	11.5	199.26	84.03	48.16	22.61	294.1	106.72
2019
1 	 10.5	3.7	18.81	10.11	9.81	5.43	41.3	15.61
2 

In [11]:
class Metrics:
    def __init__(self):
        self.metrics = {
            "pre_quals": [0, 0, 0],
            "pre_elims": [0, 0, 0],
            "pre_total": [0, 0, 0],
            "quals": [0, 0, 0],
            "elims": [0, 0, 0],
            "total": [0, 0, 0],
        }
        
    def get_mse(self, p1, p2):
        return (p1 - p2) ** 2

    def get_acc(self, p1, p2):
        return (p1 >= 0.5 and p2 >= 0.5) or (p1 < 0.5 and p2 < 0.5)
        
    def add_match(self, pre, playoffs, p1, p2):
        keys = ["elims" if playoffs else "quals", "total"]
        for key in keys:
            key = "pre_" + key if pre else key
            self.metrics[key][0] += 1
            self.metrics[key][1] += self.get_mse(p1, p2)
            self.metrics[key][2] += self.get_acc(p1, p2)
            
    def get(self, key):
        count, mse, acc = self.metrics[key]
        return round(mse / count, 4), round(acc / count, 4), count
    
    def __add__(self, other):
        for key in other.metrics:
            for i in range(3):
                self.metrics[key][i] += other.metrics[key][i]
        return self

In [12]:
class EloModel:
    def __init__(self, year_stats, year_teams_dict):
        self.year_stats = year_stats
        
        self.elos = defaultdict(dict)
        for year in range(2014, 2016):
            for _, team_year in year_teams_dict[year].iterrows():
                self.elos[year][team_year.team] = team_year.elo_end
        
    def _init_team_year(self, year, team):
        elos = [1450, 1450]
        for team_year in range(year - 4, year):
            if team in self.elos[team_year]:
                elos.append(self.elos[team_year][team])
        self.elos[year][team] = (elos[-1] * 0.56) + (elos[-2] * 0.24) + (1450 * 0.20)
                
    def _get(self, year, team):
        if team not in self.elos[year]:
             self._init_team_year(year, team)
        return self.elos[year][team]
    
    def _get_pred_margin(self, match):
        red_sum = sum([self._get(match.year, int(t)) for t in match.red.split(",")])
        blue_sum = sum([self._get(match.year, int(t)) for t in match.blue.split(",")])
        return (red_sum - blue_sum) / 250
    
    def win_prob(self, match):
        return 1 / (10 ** (-250 * self._get_pred_margin(match) / 400) + 1)
    
    def update(self, match):
        pred_margin = self._get_pred_margin(match)
        actual_margin = (match.red_score - match.blue_score) / self.year_stats[match.year][1][7]
        k = 3 if match.playoff else 12
        for t in match.red.split(","):
            self.elos[match.year][int(t)] += k * (actual_margin - pred_margin)
        for t in match.blue.split(","):
            self.elos[match.year][int(t)] -= k * (actual_margin - pred_margin)

In [13]:
class RVModel:
    def __init__(self, year_stats, year_teams_dict):
        self.year_stats = year_stats
        
        self.auto_rvs = defaultdict(dict)
        self.teleop_rvs = defaultdict(dict)
        self.endgame_rvs = defaultdict(dict)
        self.all_rvs = defaultdict(dict)
        
        for year in range(2014, 2016):
            for _, team_year in year_teams_dict[year].iterrows():
                for store in [self.auto_rvs, self.teleop_rvs, self.endgame_rvs, self.all_rvs]:
                    store[year][team_year.team] = [100 + (team_year.elo_end - 1500) / 250 * 100]
        
        self.factor = 0.3
        self.defense_percent = 0.3
        
        self.divider = {
            2016: 125,
            2017: 250,
            2018: 200,
            2019: 125,
            2020: 200,
            2022: 150,
        }
            
    def _init_team_year(self, year, team):
        auto_rvs = [95, 95]
        all_rvs = [95, 95]
        for team_year in range(year - 4, year):
            if team in self.all_rvs[team_year]:
                all_rvs.append(self.all_rvs[team_year][team][-1])
                auto_rvs.append(self.auto_rvs[team_year][team][-1])
                
        for dest, src in [(self.auto_rvs, auto_rvs), (self.teleop_rvs, all_rvs), (self.endgame_rvs, all_rvs), (self.all_rvs, all_rvs)]:
            dest[year][team] = [(src[-1] * 0.56) + (src[-2] * 0.24) + (95 * 0.2)]
    
    def nl(self, x, year):
        if x < 600:
            return x
        return 600 + 0.9 * (x - 600)

    def rv_sum(self, x, year):
        x = list(x)
        return sum(x) + self.factor * max(x) - self.factor * min(x)
    
    def update_rv(self, initial, update, rvs, year):
        mult = 1
        if initial == max(rvs):
            mult += self.factor
        elif initial == min(rvs):
            mult -= self.factor
        return round(initial + mult * update, 2)
    
    def _get_pred_margin(self, match):
        red_teams = [int(x) for x in match.red.split(",")]
        blue_teams = [int(x) for x in match.blue.split(",")]
        year = match.year
        
        for team in red_teams + blue_teams:
            if team not in self.auto_rvs[year]:
                self._init_team_year(year, team)
        
        red_auto = self.rv_sum([self.auto_rvs[year][x][-1] for x in red_teams], year)
        blue_auto = self.rv_sum([self.auto_rvs[year][x][-1] for x in blue_teams], year)
        auto_diff = self.nl(red_auto, year) - self.nl(blue_auto, year)
        
        red_teleop = self.rv_sum([self.teleop_rvs[year][x][-1] for x in red_teams], year)
        blue_teleop = self.rv_sum([self.teleop_rvs[year][x][-1] for x in blue_teams], year)
        teleop_diff = self.nl(red_teleop, year) - self.nl(blue_teleop, year)
        
        red_endgame = self.rv_sum([self.endgame_rvs[year][x][-1] for x in red_teams], year)
        blue_endgame = self.rv_sum([self.endgame_rvs[year][x][-1] for x in blue_teams], year)
        endgame_diff = self.nl(red_endgame, year) - self.nl(blue_endgame, year)
        
        a_m, _, t_m, _, e_m, _, s_m, _ = self.year_stats[year][1]
        a_sd, t_sd, e_sd, s_sd = a_m / 3, t_m / 3, e_m / 3, s_m / 3
        point_diff = (a_sd * auto_diff + t_sd * teleop_diff + e_sd * endgame_diff) / s_sd
        return point_diff / 100
        
    def win_prob(self, match):
        pred_margin = self._get_pred_margin(match)
        elo_prob = 1 / (10 ** (-100 * self._get_pred_margin(match) / self.divider[match.year]) + 1)
        if match.year == 2018:
            return elo_prob
        opr_prob = match.opr_win_prob
        return 0.75 * elo_prob + 0.25 * opr_prob
    
    def update(self, match):
        red_teams = [int(x) for x in match.red.split(",")]
        blue_teams = [int(x) for x in match.blue.split(",")]
        year = match.year
    
        a_m, a_sd, t_m, t_sd, e_m, e_sd, s_m, s_sd = self.year_stats[year][1]
        a_sd, t_sd, e_sd, s_sd = a_m / 3, t_m / 3, e_m / 3, s_m / 3
        
        for arr in [self.auto_rvs, self.teleop_rvs, self.endgame_rvs]:
            for team in red_teams + blue_teams:
                arr[year][team].append(arr[year][team][-1])
                
        k = 3 if match.playoff else 10
        defense_percent = 0.8 if year == 2018 else self.defense_percent
                
        for (sd, store, teams, get_func, weight) in [
            (a_sd, self.auto_rvs, red_teams, lambda m: m.red_auto, 1),
            (a_sd, self.auto_rvs, blue_teams, lambda m: m.blue_auto, 1),
            (e_sd, self.endgame_rvs, red_teams, lambda m: m.red_endgame, 1),
            (e_sd, self.endgame_rvs, blue_teams, lambda m: m.blue_endgame, 1),
            (t_sd, self.teleop_rvs, red_teams, lambda m: m.red_teleop, 1 - defense_percent),
            (t_sd, self.teleop_rvs, blue_teams, lambda m: m.blue_teleop, 1 - defense_percent),
        ]:
            temp_store = [store[year][x][-2] for x in teams]
            score_pred = self.nl(self.rv_sum(temp_store, year), year) / 100
            score = get_func(match) / sd
            for x in teams:
                store[year][x][-1] = self.update_rv(store[year][x][-1], k * weight * (score - score_pred), temp_store, year)
                
        for (sd, store, red_get_func, blue_get_func, weight) in [
            (t_sd, self.teleop_rvs, lambda m: m.red_teleop, lambda m: m.blue_teleop, defense_percent),
        ]:
            red_rvs = [store[year][x][-2] for x in red_teams]
            blue_rvs = [store[year][x][-2] for x in blue_teams]
            pred_win_margin = (self.nl(self.rv_sum(red_rvs, year), year) - self.nl(self.rv_sum(blue_rvs, year), year)) / 100
            win_margin = (red_get_func(match) - blue_get_func(match)) / sd
            for x in red_teams:
                store[year][x][-1] = self.update_rv(store[year][x][-1], k * weight * (win_margin - pred_win_margin), red_rvs, year)
            for x in blue_teams:
                store[year][x][-1] = self.update_rv(store[year][x][-1], -k * weight * (win_margin - pred_win_margin), blue_rvs, year)
                
        for x in red_teams + blue_teams:
            all_rv = (a_m * self.auto_rvs[year][x][-1] + t_m * self.teleop_rvs[year][x][-1] + e_m * self.teleop_rvs[year][x][-1]) / s_m
            self.all_rvs[year][x].append(all_rv)

In [14]:
def simulate_event(model, matches, print_logs=False):
    metrics = Metrics()
    for i, m in matches.iterrows():
        if m.status != "Completed":
            continue
        winner = {"red": 1, "blue": 0, "draw": 0.5}[m.winner]
        win_prob = model.win_prob(m)
        # model.update(m)
        metrics.add_match(True, m.playoff, win_prob, winner)
        # return
    for i, m in matches.iterrows():
        if m.status != "Completed":
            continue
        winner = {"red": 1, "blue": 0, "draw": 0.5}[m.winner]
        win_prob = model.win_prob(m)
        metrics.add_match(False, m.playoff, win_prob, winner)
        model.update(m)
    return metrics


model = RVModel(year_stats, year_teams_dict)
# model = EloModel(year_stats, year_teams_dict)

baselines = elo_baselines

all_metrics = Metrics()
for year in years:
    year_metrics = Metrics()
    for key in year_events_dict[year].sort_values(by=["time"])["key"].values:
        if key not in event_matches_dict:
            continue
        matches = event_matches_dict[key].sort_values(by=["time"])
        metrics = simulate_event(model, matches, False)
        year_metrics += metrics
        if key in baselines:
            print(key, metrics.get("pre_quals")[0], baselines[key])
    all_metrics += year_metrics
    print(year, year_metrics.get("total"))
print("Overall", all_metrics.get("total"))

2016 (0.1778, 0.7247, 13286)
2017 (0.2035, 0.6711, 15429)
2018roe 0.1915 0.197
2018gal 0.1921 0.18
2018new 0.1401 0.127
2018carv 0.1599 0.157
2018tur 0.1638 0.16
2018hop 0.1854 0.18
2018 (0.1741, 0.7385, 16930)
2019gal 0.121 0.126
2019hop 0.1669 0.17
2019carv 0.1616 0.155
2019roe 0.1708 0.179
2019new 0.2009 0.197
2019 (0.1744, 0.7339, 18022)
2020 (0.1815, 0.7278, 4571)
2022 (0.1427, 0.7861, 13798)
Overall (0.1754, 0.73, 82036)


In [15]:
model.all_rvs[2022][1678]

[288.7053758224338,
 320.1111896863554,
 344.92608339619557,
 370.8088013160167,
 366.02807332612315,
 384.8244292868244,
 423.382258131947,
 458.48304247062134,
 463.31880340637474,
 483.7155838915196,
 484.9216743767553,
 477.3764512992029,
 461.5986532641394,
 470.9995817466305,
 489.17961546501374,
 482.2443962955221,
 486.10789719074063,
 486.77336238628,
 488.22266284343215,
 434.20043624862535,
 527.7409090330732,
 540.9958191022366,
 594.5138069963374,
 624.100234301866,
 620.425473375201,
 565.0876196275526,
 617.7745589799052,
 660.1374218615093,
 589.6854996410037,
 564.256772123713,
 574.9422322296849,
 581.6265811740542,
 584.7494010669915,
 598.4221262576231,
 599.1786701687737,
 605.617210371811,
 552.304525261522,
 571.7575817284534,
 607.1847139390525,
 625.9074503994402,
 637.3815547719238,
 653.0855209990093,
 619.9099562842523,
 629.6078923738287,
 674.9357507566186,
 683.4595868361978,
 681.5624174535805,
 677.8067208644993,
 674.6706625525999,
 673.9124152723373,


In [16]:
def print_win_probs(model, matches, print_logs=False):
    metrics = Metrics()
    for i, m in matches.iterrows():
        win_prob = model.win_prob(m)
        if m.status == "Completed":
            model.update(m)
        print(m.key, round(1-win_prob, 4))
        
print_win_probs(model, event_matches_dict["2022carv"].sort_values(by=["match_number"]))

2022carv_qm1 0.0905
2022carv_qm2 0.1133
2022carv_qm3 0.9939
2022carv_qm4 0.0436
2022carv_qm5 0.029
2022carv_qm6 0.3629
2022carv_qm7 0.0021
2022carv_qm8 0.0103
2022carv_qm9 0.015
2022carv_qm10 0.7869
2022carv_qm11 0.012
2022carv_qm12 0.1544
2022carv_qm13 0.0329
2022carv_qm14 0.1805
2022carv_qm15 0.0132
2022carv_qm16 0.7392
2022carv_qm17 0.0999
2022carv_qm18 0.3704
2022carv_qm19 0.7414
2022carv_qm20 0.8347
2022carv_qm21 0.9913
2022carv_qm22 0.7695
2022carv_qm23 0.3781
2022carv_qm24 0.2903
2022carv_qm25 0.128
2022carv_qm26 0.9022
2022carv_qm27 0.902
2022carv_qm28 0.0364
2022carv_qm29 0.0344
2022carv_qm30 0.986
2022carv_qm31 0.0896
2022carv_qm32 0.6089
2022carv_qm33 0.854
2022carv_qm34 0.0853
2022carv_qm35 0.6138
2022carv_qm36 0.0568
2022carv_qm37 0.9809
2022carv_qm38 0.0033
2022carv_qm39 0.992
2022carv_qm40 0.9916
2022carv_qm41 0.2504
2022carv_qm42 0.3072
2022carv_qm43 0.9307
2022carv_qm44 0.9363
2022carv_qm45 0.6736
2022carv_qm46 0.9862
2022carv_qm47 0.5642
2022carv_qm48 0.2362
2022carv_