In [1]:
from collections import defaultdict
import json
import statistics

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
events = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/events.csv")

In [3]:
matches = pd.read_csv("https://raw.githubusercontent.com/avgupta456/statbotics-csvs/main/matches.csv")

In [4]:
event_to_week = {v.key: v.week for _, v in events.iterrows()}

In [6]:
for year in range(2002, 2021):
    sykes_matches = pd.read_csv("sykes/" + str(year) + ".csv")
    
    sykes_stats = defaultdict(lambda: [0, 0, 0])
    for _, match in sykes_matches.iterrows():
        week = event_to_week.get(match["Key"].split("_")[0], 1)
        pred = match["Red win likelihood"]
        if type(pred) != float:
            pred = float(pred.split("%")[0]) / 100
        winner = match["Red victory"]
        
        if not (pred >= 0 and winner >= 0):
            continue
        
        if pred >= 0.5 and winner >= 1:
            sykes_stats[week][0] += 1
        if pred < 0.5 and winner <= 0:
            sykes_stats[week][0] += 1
        sykes_stats[week][1] += (pred - winner) ** 2
        
        sykes_stats[week][2] += 1
            
    sb_matches = matches[matches.year == year]

    sb_stats = defaultdict(lambda: [0, 0, 0])
    for _, match in sb_matches.iterrows():
        week = event_to_week[match["event"]]
        pred = match["mix_win_prob"]
        winner = 1 if match["winner"] == "red" else (0 if match["winner"] == "blue" else 0.5)
        
        if not (pred >= 0 and winner >= 0):
            continue
        
        if pred >= 0.5 and winner >= 1:
            sb_stats[week][0] += 1
        if pred < 0.5 and winner <= 0:
            sb_stats[week][0] += 1
        sb_stats[week][1] += (pred - winner) ** 2
        sb_stats[week][2] += 1
    
    weeks = sorted(sb_stats.keys())
    for week in weeks:
        print(
            year, week, "\t", 
            sykes_stats[week][2], "\t", 
            "{:.4f}".format(sykes_stats[week][0] / sykes_stats[week][2]), "\t", 
            "{:.4f}".format(sykes_stats[week][1] / sykes_stats[week][2]), "\t\t", 
            sb_stats[week][2], "\t", 
            "{:.4f}".format(sb_stats[week][0] / sb_stats[week][2]), "\t", 
            "{:.4f}".format(sb_stats[week][1] / sb_stats[week][2])
        )
    sykes_acc = sum(sykes_stats[week][0] for week in weeks)
    sykes_mse = sum(sykes_stats[week][1] for week in weeks)
    sykes_count = sum(sykes_stats[week][2] for week in weeks)
    sb_acc = sum(sb_stats[week][0] for week in weeks)
    sb_mse = sum(sb_stats[week][1] for week in weeks)
    sb_count = sum(sb_stats[week][2] for week in weeks)
    print(
        year, "\t", 
        sykes_count, "\t", 
        "{:.4f}".format(sykes_acc / sykes_count), "\t", 
        "{:.4f}".format(sykes_mse / sykes_count), "\t\t", 
        sb_count, "\t", 
        "{:.4f}".format(sb_acc / sb_count), "\t", 
        "{:.4f}".format(sb_mse / sb_count)
    )
    print()

2002 1 	 255 	 0.5451 	 0.2331 		 255 	 0.5255 	 0.2463
2002 2 	 341 	 0.5425 	 0.2398 		 341 	 0.5337 	 0.2478
2002 3 	 474 	 0.5992 	 0.2281 		 474 	 0.5717 	 0.2330
2002 4 	 360 	 0.5417 	 0.2460 		 360 	 0.4778 	 0.2575
2002 5 	 615 	 0.5675 	 0.2359 		 615 	 0.5593 	 0.2394
2002 8 	 152 	 0.5526 	 0.2271 		 152 	 0.5526 	 0.2386
2002 	 2197 	 0.5626 	 0.2355 		 2197 	 0.5403 	 0.2430

2003 1 	 602 	 0.6462 	 0.2190 		 602 	 0.6412 	 0.2190
2003 2 	 435 	 0.6069 	 0.2313 		 435 	 0.6092 	 0.2313
2003 3 	 427 	 0.6159 	 0.2223 		 427 	 0.6253 	 0.2281
2003 4 	 576 	 0.6111 	 0.2280 		 576 	 0.6007 	 0.2298
2003 5 	 561 	 0.6185 	 0.2182 		 561 	 0.6221 	 0.2193
2003 8 	 572 	 0.6119 	 0.2203 		 572 	 0.6259 	 0.2169
2003 	 3173 	 0.6193 	 0.2229 		 3173 	 0.6212 	 0.2235

2004 1 	 378 	 0.5423 	 0.2278 		 277 	 0.6065 	 0.2009
2004 2 	 450 	 0.6533 	 0.1998 		 450 	 0.6956 	 0.1838
2004 3 	 617 	 0.6013 	 0.2180 		 617 	 0.6224 	 0.2122
2004 4 	 656 	 0.6357 	 0.2069 		 656 	 0.6585

2020 1 	 2133 	 0.6962 	 0.1892 		 2072 	 0.7046 	 0.1828
2020 2 	 2411 	 0.6985 	 0.1951 		 2411 	 0.7059 	 0.1874
2020 3 	 88 	 0.6136 	 0.2257 		 88 	 0.6932 	 0.2062
2020 	 4632 	 0.6958 	 0.1929 		 4571 	 0.7051 	 0.1857

