In [1]:
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
years = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/years.csv")
matches = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/matches.csv")

In [3]:
def run_simulation(model, data, start_year, end_year):
    out = []
    for year in range(start_year, end_year + 1):
        count, total_acc, total_mse = 0, 0, 0
        train_data = data[data.year == year].sort_values(by=["time"])
        for (index, match) in train_data.iterrows():
            win_prob = model.predict(match)
    
            winner = match.winner
            if (winner == "red" and win_prob > 0.5) or (winner == "blue" and win_prob < 0.5):
                total_acc += 1
            win_probs = {"red": 1, "blue": 0, "draw": 0.5}
            total_mse += (win_probs[winner] - win_prob) ** 2
            count += 1
        
        total_acc /= count
        total_mse /= count
        
        out.append([total_acc, total_mse])
    
    return out

In [4]:
from collections import defaultdict

class ModelWrapper:
    def get_teams(self, match):
        return [int(r) for r in match.red.split(",")], [int(b) for b in match.blue.split(",")]
    
    def get_elo(self, year, team):
        if team in self.elos[year]:
            return self.elos[year][team]
        return self.get_new_elo(year, team)
        
    def get_elos(self, match, year, red_teams, blue_teams):
        red_elos = [self.get_elo(year, r) for r in red_teams]
        blue_elos = [self.get_elo(year, b) for b in blue_teams]
        return [red_elos, blue_elos]
    
    def update_elo(self, year, team, new_elo):
        self.elos[year][team] = new_elo
        self.elos_list[year][team].append(new_elo)
        
    def update_elos(self, match, year, red_teams, blue_teams, red_elos, blue_elos):
        for t, elo in zip(red_teams + blue_teams, red_elos + blue_elos):
            self.update_elo(year, t, elo)
    
    def update(self, match, year, red_teams, blue_teams):
        red_elos, blue_elos = self.get_elos(match, year, red_teams, blue_teams)
        new_red_elos, new_blue_elos = self.update_match(match, year, red_elos, blue_elos)
        self.update_elos(match, year, red_teams, blue_teams, new_red_elos, new_blue_elos)
        
    def predict(self, match):
        red_teams, blue_teams = self.get_teams(match)
        year = match.year
        red_elos, blue_elos = self.get_elos(match, year, red_teams, blue_teams)
        out = self.predict_match(match, red_elos, blue_elos)
        self.update(match, year, red_teams, blue_teams)
        return out
    
            
class Model(ModelWrapper):
    def __init__(self):
        self.elos = defaultdict(dict)
        self.elos_list = defaultdict(lambda: defaultdict(list))
        self.momentum = defaultdict(int)
        self.sd_scores = {2002 + k: v for k, v in years.score_sd.to_dict().items()}
        
        self.one_weight = 0.56
        self.two_weight = 0.24
        self.min_games = 8
        self.mean_reversion = 1450
        
    def get_new_elo(self, year, team):
        elo_2_list = self.elos_list[year - 2][team]
        elo_2 = self.mean_reversion
        if len(elo_2_list) > self.min_games:
            elo_2 = max(elo_2_list[self.min_games:])

        elo_1_list = self.elos_list[year - 1][team]
        elo_1 = self.mean_reversion
        if len(elo_1_list) > self.min_games:
            elo_1 = max(elo_1_list[self.min_games:])

        reversion_weight = (1 - self.one_weight - self.two_weight)
        new_elo = self.one_weight * elo_1 + self.two_weight * elo_2 + reversion_weight * self.mean_reversion
        
        return new_elo
    
    def elo_sum(self, elos):
        return sum(elos)
    
    def predict_match(self, match, red_elos, blue_elos):
        red_sum, blue_sum = self.elo_sum(red_elos), self.elo_sum(blue_elos)
        return 1 / (10 ** ((blue_sum - red_sum) / 400) + 1)
    
    def update_match(self, match, year, red_elos, blue_elos):
        pred_win_margin = 4 / 1000 * (self.elo_sum(red_elos) - self.elo_sum(blue_elos))
        win_margin = (match.red_score - match.blue_score) / self.sd_scores[year]
        update = (4 if match.playoff else 12) * (win_margin - pred_win_margin)
        return [r + update for r in red_elos], [b - update for b in blue_elos]

In [5]:
acc, mse = 0, 0

model = Model()

data = run_simulation(model, matches, 2013, 2020)
for _acc, _mse in data:
    acc += _acc
    mse += _mse
    
acc /= len(data)
mse /= len(data)

for datum in data:
    print(datum)
print()
print(acc, mse)

[0.6796069392211573, 0.1968362058719695]
[0.6938584153774027, 0.19434439494626804]
[0.700152207001522, 0.18550598485479916]
[0.7016516516516517, 0.1862521535032651]
[0.6493329879549281, 0.20932405061612483]
[0.7402244536326048, 0.1725275692906418]
[0.701358469642362, 0.18256784527211412]
[0.6954714504484796, 0.1929892487069686]

0.6952070718662636 0.19004343163276888
