# **The output of this code contains the correct bracket for each year**

**Data Processing Functions:**
seeds_to_dict: Converts a DataFrame containing team seeds into a dictionary mapping team IDs to seeds.
slots_to_dict: Converts a DataFrame containing tournament slots into a dictionary mapping slot IDs to tuples of strong and weak seeds.
results_to_dict: Converts a DataFrame containing tournament results into a dictionary mapping team matchups to the winning team.
get_play_in: Determines the play-in teams based on tournament results.
slots_with_play_in: Adjusts the slots dictionary to include play-in teams.
build_bracket_dict: Constructs a bracket dictionary mapping each slot to the winning team.
build_bracket_tab: Constructs a DataFrame representing the tournament bracket.
Constants:

**LEVELS**: Represents the different rounds of the tournament.
**SLOTS**: Maps each slot in the tournament bracket to the corresponding teams.
Main Execution:

Filters the tournament results and seeds dataframes for the desired season (2023).
Constructs the tournament bracket DataFrames for both men's and women's tournaments.
Concatenates the men's and women's bracket DataFrames into a single submission DataFrame.
Writes the submission DataFrame to a CSV file named 'submission.csv'.
Overall, the code appears to be logically structured and should work as intended given that the input data files are properly formatted and contain the necessary information. However, without access to the actual data files, it's challenging to confirm the correctness of the results. Make sure to verify the generated submission file against the competition requirements and any provided sample submissions.

In [1]:
import polars as pl
import os
import pandas as pd

TABS = {}
for file in os.listdir('../input/march-machine-learning-mania-2024/'):
    try:
        TABS[file[:-4]] = pl.read_csv(f'../input/march-machine-learning-mania-2024/{file}')
    except pl.ComputeError as e:
        print(f"Could not read file {file}")

Could not read file MTeamSpellings.csv
Could not read file WTeamSpellings.csv


In [2]:
def seeds_to_dict(seeds):
    return {row['TeamID']: row['Seed'] for row in seeds.iter_rows(named=True)}


def slots_to_dict(slots):
    return {row['Slot']: (row['StrongSeed'], row['WeakSeed']) 
            for row in slots.iter_rows(named=True)}


def results_to_dict(results_tab, seeds_dict):
    out = {}
    for row in results_tab.iter_rows(named=True):
        wid = seeds_dict[row['WTeamID']]
        lid = seeds_dict[row['LTeamID']]
        out[(wid, lid)] = wid
        out[(lid, wid)] = wid
    return out


LEVELS = [
    ['R1W1', 'R1W2', 'R1W3', 'R1W4', 'R1W5', 'R1W6', 'R1W7', 'R1W8', 'R1X1', 'R1X2', 'R1X3', 'R1X4', 'R1X5', 'R1X6', 'R1X7', 'R1X8', 'R1Y1', 'R1Y2', 'R1Y3', 'R1Y4', 'R1Y5', 'R1Y6', 'R1Y7', 'R1Y8', 'R1Z1', 'R1Z2', 'R1Z3', 'R1Z4', 'R1Z5', 'R1Z6', 'R1Z7', 'R1Z8'], 
    ['R2W1', 'R2W2', 'R2W3', 'R2W4', 'R2X1', 'R2X2', 'R2X3', 'R2X4', 'R2Y1', 'R2Y2', 'R2Y3', 'R2Y4', 'R2Z1', 'R2Z2', 'R2Z3', 'R2Z4'],
    ['R3W1', 'R3W2', 'R3X1', 'R3X2', 'R3Y1', 'R3Y2', 'R3Z1', 'R3Z2'], 
    ['R4W1', 'R4X1', 'R4Y1', 'R4Z1'], 
    ['R5WX', 'R5YZ'], 
    ['R6CH'],
]

SLOTS = {'R1W1': ('W01', 'W16'), 'R1W2': ('W02', 'W15'), 'R1W3': ('W03', 'W14'), 'R1W4': ('W04', 'W13'), 'R1W5': ('W05', 'W12'), 'R1W6': ('W06', 'W11'), 'R1W7': ('W07', 'W10'), 'R1W8': ('W08', 'W09'), 'R1X1': ('X01', 'X16'), 'R1X2': ('X02', 'X15'), 'R1X3': ('X03', 'X14'), 'R1X4': ('X04', 'X13'), 'R1X5': ('X05', 'X12'), 'R1X6': ('X06', 'X11'), 'R1X7': ('X07', 'X10'), 'R1X8': ('X08', 'X09'), 'R1Y1': ('Y01', 'Y16'), 'R1Y2': ('Y02', 'Y15'), 'R1Y3': ('Y03', 'Y14'), 'R1Y4': ('Y04', 'Y13'), 'R1Y5': ('Y05', 'Y12'), 'R1Y6': ('Y06', 'Y11'), 'R1Y7': ('Y07', 'Y10'), 'R1Y8': ('Y08', 'Y09'), 'R1Z1': ('Z01', 'Z16'), 'R1Z2': ('Z02', 'Z15'), 'R1Z3': ('Z03', 'Z14'), 'R1Z4': ('Z04', 'Z13'), 'R1Z5': ('Z05', 'Z12'), 'R1Z6': ('Z06', 'Z11'), 'R1Z7': ('Z07', 'Z10'), 'R1Z8': ('Z08', 'Z09'), 'R2W1': ('R1W1', 'R1W8'), 'R2W2': ('R1W2', 'R1W7'), 'R2W3': ('R1W3', 'R1W6'), 'R2W4': ('R1W4', 'R1W5'), 'R2X1': ('R1X1', 'R1X8'), 'R2X2': ('R1X2', 'R1X7'), 'R2X3': ('R1X3', 'R1X6'), 'R2X4': ('R1X4', 'R1X5'), 'R2Y1': ('R1Y1', 'R1Y8'), 'R2Y2': ('R1Y2', 'R1Y7'), 'R2Y3': ('R1Y3', 'R1Y6'), 'R2Y4': ('R1Y4', 'R1Y5'), 'R2Z1': ('R1Z1', 'R1Z8'), 'R2Z2': ('R1Z2', 'R1Z7'), 'R2Z3': ('R1Z3', 'R1Z6'), 'R2Z4': ('R1Z4', 'R1Z5'), 'R3W1': ('R2W1', 'R2W4'), 'R3W2': ('R2W2', 'R2W3'), 'R3X1': ('R2X1', 'R2X4'), 'R3X2': ('R2X2', 'R2X3'), 'R3Y1': ('R2Y1', 'R2Y4'), 'R3Y2': ('R2Y2', 'R2Y3'), 'R3Z1': ('R2Z1', 'R2Z4'), 'R3Z2': ('R2Z2', 'R2Z3'), 'R4W1': ('R3W1', 'R3W2'), 'R4X1': ('R3X1', 'R3X2'), 'R4Y1': ('R3Y1', 'R3Y2'), 'R4Z1': ('R3Z1', 'R3Z2'), 'R5WX': ('R4W1', 'R4X1'), 'R5YZ': ('R4Y1', 'R4Z1'), 'R6CH': ('R5WX', 'R5YZ')}


def get_play_in(results):
    play_in = set()
    for team1, team2 in results:
        if len(team1) > 3:
            play_in.add(team1[:3])
        if len(team2) > 3:
            play_in.add(team2[:3])
    return list(play_in)


def slots_with_play_in(play_in):
    out = SLOTS.copy()
    for slot in play_in:
        out[slot] = f'{slot}a', f'{slot}b'
    return out


def build_bracket_dict(results_dict):
    bracket = {}
    play_in = get_play_in(results_dict)
    slots = slots_with_play_in(play_in)
    levels = [play_in] + LEVELS
    # Play-in phase
    for key in levels[0]:
        bracket[key] = results_dict[slots[key]]
    # Regular phase
    for i in range(1, 7):
        for key in levels[i]:
            k1, k2 = slots[key]
            if k1 in levels[i-1]:
                k1 = bracket[k1]
            if k2 in levels[i-1]:
                k2 = bracket[k2]
            bracket[key] = results_dict[(k1, k2)]
    return bracket


def build_bracket_tab(results_tab, seeds_tab):
    seeds_dict = seeds_to_dict(seeds_tab)
    results_dict = results_to_dict(results_tab, seeds_dict)
    bracket_dict = build_bracket_dict(results_dict)
    # Remove play-in phase, map W16a -> W16, ...
    data = {'Slot': [], 'Team': []}
    for slot, win in bracket_dict.items():
        if not slot.startswith('R'):
            continue
        data['Slot'].append(slot)
        data['Team'].append(win[:3])
    return pl.DataFrame(data)

In [3]:
all_submissions = []
for season in range(1998, 2024, 1):
    if season != 2020:
        # print(season)
        m_results = TABS['MNCAATourneyCompactResults'].filter(pl.col('Season') == season)
        w_results = TABS['WNCAATourneyCompactResults'].filter(pl.col('Season') == season)
        m_seeds = TABS['MNCAATourneySeeds'].filter(pl.col('Season') == season)
        w_seeds = TABS['WNCAATourneySeeds'].filter(pl.col('Season') == season)
        if season == 2021:
            new_row = {'Season': 2021, 'DayNum': 136, 'WTeamID': 1332,'WScore': 78,'LTeamID': 1433,'LScore': 59, 'WLoc': 'N','NumOT': 0}
            new_row = pl.DataFrame(new_row)  
            m_results = pl.concat([m_results, new_row])  
        m_tab = build_bracket_tab(m_results, m_seeds)
        w_tab = build_bracket_tab(w_results, w_seeds)
        submission = pl.concat([
        m_tab.with_columns(
            pl.lit('M').alias('Tournament'),
            pl.lit(1).alias('Bracket')),
        w_tab.with_columns(
            pl.lit('W').alias('Tournament'),
            pl.lit(1).alias('Bracket')),]).with_row_index(name='RowId')[['RowId', 'Tournament', 'Bracket', 'Slot', 'Team']]
        submission = submission.with_columns(season)
        all_submissions.append(submission)
submission = pl.concat(all_submissions).to_pandas()  
submission['Season'] = submission['literal']
submission.drop('literal', axis=1, inplace=True)
submission.to_csv('submission.csv')
submission

Unnamed: 0,RowId,Tournament,Bracket,Slot,Team,Season
0,0,M,1,R1W1,W01,1998
1,1,M,1,R1W2,W02,1998
2,2,M,1,R1W3,W14,1998
3,3,M,1,R1W4,W04,1998
4,4,M,1,R1W5,W05,1998
...,...,...,...,...,...,...
3145,121,W,1,R4Y1,Y03,2023
3146,122,W,1,R4Z1,Z01,2023
3147,123,W,1,R5WX,X02,2023
3148,124,W,1,R5YZ,Y03,2023


# Output

In [4]:
submission[submission['Season'] == 2024]

Unnamed: 0,RowId,Tournament,Bracket,Slot,Team,Season
