In [334]:
# Add module import path to system path.
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Hot-loading modules before executing code: allows us to write code while testing it in this notebook.
%reload_ext autoreload
%autoreload 2

In [335]:
import logging

# Logging strategy for this notebook.
root = logging.getLogger()
root.setLevel(logging.INFO)

if not root.hasHandlers():
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)

    root.addHandler(handler)

In [373]:
from parser import Parser
from span import Span

parser = Parser("M")
seasons, teams = parser.parse()

2022-03-17 10:12:36,985 - parser - INFO - Column names are TeamID, TeamName, FirstD1Season, LastD1Season
2022-03-17 10:12:36,986 - parser - INFO - Processed 372 teams.
2022-03-17 10:12:36,987 - parser - INFO - Column names are Season, DayNum, WTeamID, WScore, LTeamID, LScore, WLoc, NumOT
2022-03-17 10:12:37,788 - parser - INFO - Processed 176080 games.
2022-03-17 10:12:37,789 - parser - INFO - Column names are Season, DayNum, WTeamID, WScore, LTeamID, LScore, WLoc, NumOT
2022-03-17 10:12:37,798 - parser - INFO - Processed 2317 games.
2022-03-17 10:12:37,800 - parser - INFO - Column names are Season, Seed, TeamID
2022-03-17 10:12:37,805 - parser - INFO - Processed 2422 seeds.
2022-03-17 10:12:37,806 - parser - INFO - Column names are Season, RankingDayNum, SystemName, TeamID, OrdinalRank
2022-03-17 10:12:42,898 - parser - INFO - Processed 4603637 ranking rows.
2022-03-17 10:12:42,899 - parser - INFO - Column names are Season, DayZero, RegionW, RegionX, RegionY, RegionZ
2022-03-17 10:12:

In [337]:
import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display

year_dd_layout=Layout(width= '155px')

algorithm_dd = widgets.Dropdown(options = [("Gradient Boosting Classifier", "GB"), ("Logistic Regression", "LR"), ("Multi-Layer Perceptron Classifier", "MLP")], value = "MLP", 
                                description = "Select algo:", 
                                layout=Layout(width='310px'))
train_start_dd = widgets.Dropdown(options = seasons.keys(), description = "Train start:", value = 1985,
                                  layout = year_dd_layout)
train_end_dd = widgets.Dropdown(options = [year for year in seasons.keys() if year > train_start_dd.value], description = "Train end:", value = 2015,
                                layout = year_dd_layout)

test_start_dd = widgets.Dropdown(options = seasons.keys(), description = "Test start:", value = 2016,
                                 layout = year_dd_layout)
test_end_dd = widgets.Dropdown(options = [year for year in seasons.keys() if year > test_start_dd.value], description = "Test end:", value = 2021,
                               layout = year_dd_layout)


display(algorithm_dd)
display(train_start_dd)
display(train_end_dd)
display(test_start_dd)
display(test_end_dd)


Dropdown(description='Select algo:', index=2, layout=Layout(width='310px'), options=(('Gradient Boosting Class…

Dropdown(description='Train start:', layout=Layout(width='155px'), options=(1985, 1986, 1987, 1988, 1989, 1990…

Dropdown(description='Train end:', index=29, layout=Layout(width='155px'), options=(1986, 1987, 1988, 1989, 19…

Dropdown(description='Test start:', index=31, layout=Layout(width='155px'), options=(1985, 1986, 1987, 1988, 1…

Dropdown(description='Test end:', index=4, layout=Layout(width='155px'), options=(2017, 2018, 2019, 2020, 2021…

In [374]:
train_span, test_span = Span.create_spans(seasons, 
                                          train_start_dd.value, train_end_dd.value, 
                                          test_start_dd.value, test_end_dd.value, 
                                          algorithm_dd.value)

# 50/50 predictions
span_predictions = test_span.predict()

# scores = Span.score(span_predictions)
# assert scores['Average'] == 0.6931471805599453, f"50/50 classifier should yield a 0.69314 score, but yielded {scores['Average']}"

In [375]:
def get_span_predictions(max_iter):
    classifier = train_span.train(max_iter)
    classifiers = test_span.build_seasons_classifiers_map(classifier)
    span_predictions = test_span.predict(classifiers)
    return span_predictions

def get_scores(span_predictions):
    scores = Span.score(span_predictions)
    return scores

def get_average_score(max_iter):
    return get_scores(get_span_predictions(max_iter))['Average']

span_predictions = get_span_predictions(50000)


# get_scores(span_predictions)

In [290]:
systems = set(seasons[2003].tournament.rankings.keys())

for i in range(2003, 2022):
    s = seasons[i]
    print(s)
    if not s or s.tournament.year == 2020:
        print ("Error")
        continue
    else:
        systems = systems & s.tournament.rankings.keys()

print(systems)

<season.Season object at 0x7fb679ba0a30>
<season.Season object at 0x7fb679ba0b80>
<season.Season object at 0x7fb679ba0cd0>
<season.Season object at 0x7fb679ba0e20>
<season.Season object at 0x7fb679ba0f70>
<season.Season object at 0x7fb679be5100>
<season.Season object at 0x7fb679be5250>
<season.Season object at 0x7fb679be53a0>
<season.Season object at 0x7fb679be54f0>
<season.Season object at 0x7fb679be5640>
<season.Season object at 0x7fb679be5790>
<season.Season object at 0x7fb679be58e0>
<season.Season object at 0x7fb679be5a30>
<season.Season object at 0x7fb679be5bb0>
<season.Season object at 0x7fb679be5d90>
<season.Season object at 0x7fb679be5f70>
<season.Season object at 0x7fb6bc150190>
[]
Error
set()


In [296]:
seasons[2016].tournament.rankings.keys()

dict_keys(['7OT', 'ACU', 'AP', 'ARG', 'BBT', 'BIH', 'BLS', 'BOB', 'BOW', 'BUR', 'BWE', 'CNG', 'COL', 'CPA', 'CPR', 'CRO', 'D1A', 'DAV', 'DC', 'DCI', 'DES', 'DII', 'DOK', 'DOL', 'EBP', 'FAS', 'FSH', 'HAS', 'KPI', 'KPK', 'KRA', 'LMC', 'LOG', 'MAS', 'MCL', 'MOR', 'MSX', 'NOL', 'PGH', 'PIG', 'POM', 'REW', 'RPI', 'RT', 'RTH', 'RTP', 'RTR', 'SAG', 'SEL', 'SFX', 'SP', 'SPR', 'SPW', 'STH', 'TPR', 'TRK', 'TRP', 'TW', 'USA', 'WIL', 'WLK', 'WOB', 'WOL'])

In [372]:
lines = ["ID,Pred"]

for year in range(2022, 2023):
    if year != 2020:
        for prediction in span_predictions[year]:
            lines.append(f"{year}_{prediction.team_1_id}_{prediction.team_2_id},{prediction.win_p}")
            
with open('submission_final_2_w.csv', 'w') as f:
    for line in lines:
        f.write(line)
        f.write('\n')



AttributeError: 'list' object has no attribute 'tournament'

In [183]:
def get_seeds_for_region(region):
    # Removing Appalachian St, 
    s = [seed for seed in seeds.values() if seed.region == region and seed.team_id != 1461]
    s.sort(key=lambda x: x.position)
    return s

In [194]:
seeds_w = get_seeds_for_region("W")
seeds_x = get_seeds_for_region("X")
seeds_y = get_seeds_for_region("Y")
seeds_z = get_seeds_for_region("Z")

def get_match_up_id(team_1_id, team_2_id):
    s, t = team_1_id, team_2_id
    if s > t:
        s, t = t, s
    return f"{s}_{t}"

def get_match_up_name(team_1_id, team_2_id):
    s, t = team_1_id, team_2_id
    if s > t:
        s, t = t, s
    return f"{name(s)} vs. {name(t)}"

In [209]:
def name(team_id):
    return seasons[2022].teams[team_id].name

predictions = span_predictions[2022]
d = {}
for p in predictions:
    d[f"{p.team_1_id}_{p.team_2_id}"] = p.win_p

import random

seeds = seeds_w
n = 15
for i in range(8):
    team_id_i = seeds[i].team_id
    team_id_n_i = seeds[n-i].team_id
    match_up_id = get_match_up_id(team_id_i, team_id_n_i)
    
    winner = name(team_id_i)
    if random.uniform(0, 1) > d[match_up_id]:
        winner = name(team_id_n_i)
    print(f"{get_match_up_name(team_id_i, team_id_n_i)} --> {winner}")



Baylor vs. Norfolk St --> Baylor
Kentucky vs. St Peter's --> Kentucky
Purdue vs. Yale --> Purdue
Akron vs. UCLA --> UCLA
Indiana vs. St Mary's CA --> St Mary's CA
Texas vs. Virginia Tech --> Texas
Murray St vs. San Francisco --> Murray St
Marquette vs. North Carolina --> North Carolina


In [193]:
for k, t in seasons[2022].teams.items():
    if 'Wyoming' in t.name:
        print (t.id)

1461


In [181]:
[(s.team_id, name(s.team_id)) for s in seeds_w]

[(1124, 'Baylor'),
 (1246, 'Kentucky'),
 (1345, 'Purdue'),
 (1417, 'UCLA'),
 (1388, "St Mary's CA"),
 (1400, 'Texas'),
 (1293, 'Murray St'),
 (1314, 'North Carolina'),
 (1266, 'Marquette'),
 (1362, 'San Francisco'),
 (1439, 'Virginia Tech'),
 (1231, 'Indiana'),
 (1461, 'Wyoming'),
 (1103, 'Akron'),
 (1463, 'Yale'),
 (1389, "St Peter's"),
 (1313, 'Norfolk St')]

In [101]:
len(teams_z)

17

In [102]:
17*4

68

In [104]:
len(seeds.keys())

68