In [22]:
# Add module import path to system path.
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Hot-loading modules before executing code: allows us to write code while testing it in this notebook.
%reload_ext autoreload
%autoreload 2

In [79]:
import logging

# Logging strategy for this notebook.
root = logging.getLogger()
root.setLevel(logging.WARN)

if not root.hasHandlers():
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.WARN)

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)

    root.addHandler(handler)

In [80]:
from parser import Parser
from span import Span

parser = Parser()
seasons, teams = parser.parse()

In [81]:
import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display

year_dd_layout=Layout(width= '155px')

algorithm_dd = widgets.Dropdown(options = [("Gradient Boosting Classifier", "GB"), ("Logistic Regression", "LR"), ("Multi-Layer Perceptron Classifier", "MLP")], value = "MLP", 
                                description = "Select algo:", 
                                layout=Layout(width='310px'))
train_start_dd = widgets.Dropdown(options = seasons.keys(), description = "Train start:", value = 1985,
                                  layout = year_dd_layout)
train_end_dd = widgets.Dropdown(options = [year for year in seasons.keys() if year > train_start_dd.value], description = "Train end:", value = 2015,
                                layout = year_dd_layout)

test_start_dd = widgets.Dropdown(options = seasons.keys(), description = "Test start:", value = 2016,
                                 layout = year_dd_layout)
test_end_dd = widgets.Dropdown(options = [year for year in seasons.keys() if year > test_start_dd.value], description = "Test end:", value = 2021,
                               layout = year_dd_layout)


display(algorithm_dd)
display(train_start_dd)
display(train_end_dd)
display(test_start_dd)
display(test_end_dd)


Dropdown(description='Select algo:', index=2, layout=Layout(width='310px'), options=(('Gradient Boosting Class…

Dropdown(description='Train start:', layout=Layout(width='155px'), options=(1985, 1986, 1987, 1988, 1989, 1990…

Dropdown(description='Train end:', index=29, layout=Layout(width='155px'), options=(1986, 1987, 1988, 1989, 19…

Dropdown(description='Test start:', index=31, layout=Layout(width='155px'), options=(1985, 1986, 1987, 1988, 1…

Dropdown(description='Test end:', index=4, layout=Layout(width='155px'), options=(2017, 2018, 2019, 2020, 2021…

In [84]:
train_span, test_span = Span.create_spans(seasons, 
                                          train_start_dd.value, train_end_dd.value, 
                                          test_start_dd.value, test_end_dd.value, 
                                          algorithm_dd.value)

# 50/50 predictions
span_predictions = test_span.predict()

# scores = Span.score(span_predictions)
# assert scores['Average'] == 0.6931471805599453, f"50/50 classifier should yield a 0.69314 score, but yielded {scores['Average']}"

In [87]:
def get_span_predictions(max_iter):
    classifier = train_span.train(max_iter)
    classifiers = test_span.build_seasons_classifiers_map(classifier)
    span_predictions = test_span.predict(classifiers)
    return span_predictions

def get_scores(span_predictions):
    scores = Span.score(span_predictions)
    return scores

def get_average_score(max_iter):
    return get_scores(get_span_predictions(max_iter))['Average']

span_predictions = get_span_predictions(10000)

span_predictions.keys()
# get_scores(span_predictions)

dict_keys([2022])

In [88]:
lines = ["ID,Pred"]

for year in range(2022, 2023):
    if year != 2020:
        for prediction in span_predictions[year]:
            lines.append(f"{year}_{prediction.team_1_id}_{prediction.team_2_id},{prediction.win_p}")
            
with open('submission.csv', 'w') as f:
    for line in lines:
        f.write(line)
        f.write('\n')

In [91]:
seeds = seasons[2022].tournament.seeds

In [105]:
def get_seeds_for_region(region):
    return [seed for seed in seeds.values() if seed.region == region]

In [106]:
seeds_w = get_seeds_for_region("W")
seeds_x = get_seeds_for_region("X")
seeds_y = get_seeds_for_region("Y")
seeds_z = get_seeds_for_region("Z")

In [108]:
seeds_w.sort(key=lambda x: x.position)


In [99]:
len(teams_x)

17

In [100]:
len(teams_y)

17

In [101]:
len(teams_z)

17

In [102]:
17*4

68

In [104]:
len(seeds.keys())

68