In [48]:
import pandas as pd

df_path = r"C:\Users\adiad\Documents\GitHub\ncaa-tourney-predict\five_thirty_eight.csv"
df_path = df_path.replace("\\", "/")
df = pd.read_csv(df_path)
df = df.replace("%", "", regex=True)

# drop first round column, rename all other round columns
cols = df.columns.tolist()
cols.remove("WIN")
df = df.drop("1ST", axis=1)
df.columns = cols

# split the team name and seed into separate columns
df["SEED"] = df.TEAM.str.split(" ")
df.TEAM = df.TEAM.str.split(" ")
for i, _ in df.iterrows():
    df.iloc[i, 0] = " ".join(df.iloc[i, 0][:-1])
    df.iloc[i, -1] = int(df.iloc[i, -1][-1])

# replace "<0.1%" strings with better approximation
# assumes these low seeds have 25% chance to win relative to previous round's chance
for i, _ in df.iterrows():
    if "<0.1" in df.iloc[i, 2:-1].values:
        new_vals = []
        for val in df.iloc[i, 2:-1].values:
            if val == "<0.1":
                val = str(last_val/4)
            new_vals += [val]
            last_val = float(val)
        df.iloc[i, 2:-1] = new_vals

# make columns nummeric
for round_col in df.columns[2:-1]:
    df[round_col] = df[round_col].astype(float)
df.SEED = df.SEED.astype(int)

df.head(10)

Unnamed: 0,TEAM,REGION,POWER RATING,1ST,2ND,SWEET 16,ELITE EIGHT,FINAL FOUR,CHAMP.,SEED
0,Alabama,South,92.6,99.0,82.0,65.0,45.0,30.0,16.0,1
1,Arizona,South,89.0,94.0,67.0,35.0,15.0,8.0,4.0,2
2,Arizona St.,West,82.4,37.0,11.0,5.0,2.0,0.5,0.2,11
3,Arkansas,West,85.1,52.0,18.0,9.0,4.0,1.0,0.6,8
4,Auburn,Midwest,84.9,58.0,16.0,9.0,4.0,2.0,0.8,9
5,Baylor,South,87.1,89.0,45.0,24.0,10.0,6.0,3.0,3
6,Boise State,West,82.6,59.0,19.0,6.0,2.0,0.6,0.2,10
7,Charleston,South,79.8,33.0,15.0,2.0,0.8,0.2,0.05,12
8,Colgate,Midwest,76.2,8.0,2.0,0.5,0.125,0.03125,0.007812,15
9,Creighton,South,87.6,79.0,46.0,26.0,11.0,6.0,2.0,6


In [49]:
import numpy as np

east_vs = "south"
print_bracket = True
seed= None

east_vs = east_vs[:1].capitalize() + east_vs[1:]
valid_east_vs_values = ["Midwest", "South", "West"]
if east_vs not in valid_east_vs_values:
    raise ValueError(f"Invalid Value '{east_vs}' given for east_vs input parameter. "
                        f"It must be one of the following: {', '.join(valid_east_vs_values)[:-1]}.")
valid_east_vs_values.remove(east_vs)
valid_east_vs_values = [region + " Region" for region in valid_east_vs_values]
east_vs += " Region"

np.random.seed(seed)
region_ls = ["Midwest", "East", "South", "West"]
tourn_dict = {}
tourn_ls = []
final_four_dict = {}
for r in region_ls:

    if print_bracket:
        print(f"Region {r} results:")

    round_dict = {}
    round_ls = ["1ST", "2ND", "SWEET 16", "ELITE EIGHT"]
    round_size_ls = [64, 32, 16, 8]
    matchups_ls = [(ts,bs) for ts, bs in zip([1,8,5,4,6,3,7,2], [16,9,12,13,11,14,10,15])]
    for round, round_size in zip(round_ls, round_size_ls):

        round_str = f"{r[0]}{round_size}, "
        if print_bracket:
            print(f"{round} round")
            print("-------------------")

        victors_ls = []
        games_dict = {}
        for matchup in matchups_ls:
            ts = matchup[0]
            ts_odd = df.loc[(df.REGION == r) & (df.SEED == ts), round].values[0]
            bs = matchup[1]
            bs_odd = df.loc[(df.REGION == r) & (df.SEED == bs), round].values[0]
            ts_prob = ts_odd/(ts_odd + bs_odd)
            result = np.random.rand()
            if result < ts_prob:

                if print_bracket:
                    print(f"Seed #{ts} beat #{bs}")

                vs = ts
                victors_ls.append(ts)

            else:
                if print_bracket:
                    print(f"Seed #{ts} lost to #{bs}")

                vs = bs
                victors_ls.append(bs)
            
            games_dict[f"{ts} vs {bs}"] = vs
            games_dict["compact_str"] = f"{ts}v{bs}={vs}"
            round_str += f"{ts}v{bs}={vs}, "

        round_dict[f"{round} Round"] = games_dict
        tourn_ls += [round_str[:-2]]
        
        # rebuild the matchup list for the next round
        if round != round_ls[-1]:
            matchups_ls = []
            for i in range(0, len(victors_ls), 2):
                if victors_ls[i] < victors_ls[i+1]:
                    matchups_ls.append((victors_ls[i], victors_ls[i+1]))
                else:
                    matchups_ls.append((victors_ls[i+1], victors_ls[i]))
        
        if print_bracket:
            print("")

    final_four_dict[r] = victors_ls[0]
    tourn_dict[f"{r} Region"] = round_dict

Region Midwest results:
1ST round
-------------------
Seed #1 beat #16
Seed #8 lost to #9
Seed #5 beat #12
Seed #4 lost to #13
Seed #6 beat #11
Seed #3 beat #14
Seed #7 beat #10
Seed #2 beat #15

2ND round
-------------------
Seed #1 lost to #9
Seed #5 beat #13
Seed #3 lost to #6
Seed #2 beat #7

SWEET 16 round
-------------------
Seed #5 lost to #9
Seed #2 lost to #6

ELITE EIGHT round
-------------------
Seed #6 beat #9

Region East results:
1ST round
-------------------
Seed #1 beat #16
Seed #8 beat #9
Seed #5 lost to #12
Seed #4 beat #13
Seed #6 beat #11
Seed #3 beat #14
Seed #7 beat #10
Seed #2 beat #15

2ND round
-------------------
Seed #1 beat #8
Seed #4 beat #12
Seed #3 beat #6
Seed #2 lost to #7

SWEET 16 round
-------------------
Seed #1 lost to #4
Seed #3 beat #7

ELITE EIGHT round
-------------------
Seed #3 lost to #4

Region South results:
1ST round
-------------------
Seed #1 beat #16
Seed #8 beat #9
Seed #5 lost to #12
Seed #4 beat #13
Seed #6 beat #11
Seed #3 beat #14

In [50]:
# Asses the East vs [east_vs] game
round_str = "F4, "

if print_bracket:        
    print("Final four round")
    print("-------------------")

games_dict = {}
east_final_game_dict_values = tourn_dict["East Region"]["ELITE EIGHT Round"].values()
east_seed = list(east_final_game_dict_values)[0]
east_opponent_final_game_dict_values = tourn_dict[east_vs]["ELITE EIGHT Round"].values()
east_opponent_seed = list(east_opponent_final_game_dict_values)[0]

if east_seed == east_opponent_seed:
    ts = east_seed
    tr = "East"
    bs = east_opponent_seed
    br = east_vs.replace(" Region", "")

else:
    ts = min(east_seed, east_opponent_seed)
    tr = "East" if east_seed < east_opponent_seed else east_vs.replace(" Region", "")
    bs = max(east_seed, east_opponent_seed)
    br = east_vs.replace(" Region", "") if east_seed > east_opponent_seed else "East"


ts_odd = df.loc[(df.REGION == tr) & (df.SEED == ts), "FINAL FOUR"].values[0]
bs_odd = df.loc[(df.REGION == br) & (df.SEED == bs), "FINAL FOUR"].values[0]
ts_prob = ts_odd/(ts_odd + bs_odd)
result = np.random.rand()
vs = ts if result <= ts_prob else bs
vr = tr if result <= ts_prob else br

if print_bracket:
    if result <= ts_prob:
        print(f"Seed #{ts} from the {tr} beat #{bs} from the {br}")
    
    else:
        print(f"Seed #{ts} from the {tr} lost to #{bs} from the {br}")

games_dict[f"{tr} {ts} vs {br} {bs}"] = {
    "Victor Region": vr,
    "Victor Seed": vs,
    "compact_str": f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}"
}

round_str += f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}, "

# Asses the other Final Four game
r0_final_game_dict_values = tourn_dict[valid_east_vs_values[0]]["ELITE EIGHT Round"].values()
r0_seed = list(r0_final_game_dict_values)[0]
r0_name = valid_east_vs_values[0].replace(" Region", "")
r1_final_game_dict_values = tourn_dict[valid_east_vs_values[1]]["ELITE EIGHT Round"].values()
r1_seed = list(r1_final_game_dict_values)[0]
r1_name = valid_east_vs_values[1].replace(" Region", "")

if r0_seed == r1_seed:
    ts = r1_seed
    tr = r0_name
    bs = r0_seed
    br = r1_name

else:
    ts = min(r0_seed, r1_seed)
    tr = r0_name if r0_seed < r1_seed else r1_name
    bs = max(r0_seed, r1_seed)
    br = r1_name if r0_seed > r1_seed else r0_name

ts_odd = df.loc[(df.REGION == tr) & (df.SEED == ts), "FINAL FOUR"].values[0]
bs_odd = df.loc[(df.REGION == br) & (df.SEED == bs), "FINAL FOUR"].values[0]
ts_prob = ts_odd/(ts_odd + bs_odd)
result = np.random.rand()
vs = ts if result <= ts_prob else bs
vr = tr if result <= ts_prob else br

if print_bracket:
    if result <= ts_prob:
        print(f"Seed #{ts} from the {tr} beat #{bs} from the {br}")
    
    else:
        print(f"Seed #{ts} from the {tr} lost to #{bs} from the {br}")

games_dict[f"{tr} {ts} vs {br} {bs}"] = {
    "Victor Region": vr,
    "Victor Seed": vs,
    "compact_str": f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}"
}

round_str += f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}, "

round_dict = {"Final Four Round": games_dict}
tourn_ls += [round_str[:-2]]

# Asses the championship game
round_str = "F2, "

if print_bracket:
    print()
    print("Championship round")
    print("-------------------")

games_dict = {}
east_victor_seed = list(round_dict["Final Four Round"].values())[0]["Victor Seed"]
east_victor_region = list(round_dict["Final Four Round"].values())[0]["Victor Region"]
non_east_victor_seed = list(round_dict["Final Four Round"].values())[1]["Victor Seed"]
non_east_victor_region = list(round_dict["Final Four Round"].values())[1]["Victor Region"]

if east_victor_seed == non_east_victor_seed:
    ts = east_victor_seed
    tr = east_victor_region
    bs = non_east_victor_seed
    br = non_east_victor_region

else:
    ts = min(east_victor_seed, non_east_victor_seed)
    tr = east_victor_region if east_victor_seed < non_east_victor_seed else non_east_victor_region
    bs = max(east_victor_seed, non_east_victor_seed)
    br = non_east_victor_region if east_victor_seed > non_east_victor_seed else east_victor_region

ts_odd = df.loc[(df.REGION == tr) & (df.SEED == ts), "CHAMP."].values[0]
bs_odd = df.loc[(df.REGION == br) & (df.SEED == bs), "CHAMP."].values[0]
ts_prob = ts_odd/(ts_odd + bs_odd)
result = np.random.rand()
vs = ts if result <= ts_prob else bs
vr = tr if result <= ts_prob else br

if print_bracket:
    if result <= ts_prob:
        print(f"Seed #{ts} from the {tr} beat #{bs} from the {br}")
    
    else:
        print(f"Seed #{ts} from the {tr} lost to #{bs} from the {br}")

games_dict[f"{tr} {ts} vs {br} {bs}"] = {
    "Victor Region": vr,
    "Victor Seed": vs,
    "compact_str": f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}"
}

round_dict["Championship Round"] = games_dict
tourn_dict["Multi-Region Playoff"] = round_dict
round_str += f"{tr[0]}{ts}v{br[0]}{bs}={vr[0]}{vs}"
tourn_ls += [round_str]

Final four round
-------------------
Seed #1 from the South beat #4 from the South
Seed #1 from the West beat #6 from the West

Championship round
-------------------
Seed #1 from the South beat #1 from the West
