# **March Madness**

<img src="https://a.espncdn.com/photo/2025/0316/2025_ncaam_bracket_filled_3x2.jpg" />

In [34]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## CBB Data

**Columns**
 - CONF: Conference
 - G: Games played
 - W: Number of wins
 - ADJOE: Adjusted offensive efficiency
 - ADJDE: Adjusted defensive efficiency
 - BARTHAG: Power rating
 - EFG%: Effective field goal percentage
 - EFGD%: Effective field goal percentage allowed
 - TOR: Turnover rate
 - TORD: Forced turnover rate
 - ORB: Offensive rebound rate
 - DRB: Defensive rebound rate
 - FTR: Free throw rate
 - FTRD: Free throw rate allowed
 - 2P_O: Two-point shooting percentage
 - 2P_D: Two-point shooting percentage allowed
 - 3P_O: Three-point shooting percentage
 - 3P_D: Three-point shooting percentage allowed
 - ADJ_T: Adjusted tempo
 - WAB: Wins above bubble
 - Postseason: NCAA tournament finish
 - Seed: Seed in NCAA tournament
 

In [38]:
cbb = pd.read_csv("/Users/jonathan/cmse401/Project/CMSE401-Project/data/cbb.csv")

postseason_mapping = {"Champions": 100,
                     "2ND": 75,
                     "F4": 50,
                     "E8": 35,
                     "S16": 15,
                     "R32": 5,
                     "R64": 0}

cbb['POSTSEASON_INT'] = cbb['POSTSEASON'].map(postseason_mapping)
cbb = cbb.dropna()
cbb

Unnamed: 0,TEAM,CONF,G,W,ADJOE,ADJDE,BARTHAG,EFG_O,EFG_D,TOR,...,2P_O,2P_D,3P_O,3P_D,ADJ_T,WAB,POSTSEASON,SEED,YEAR,POSTSEASON_INT
0,North Carolina,ACC,40,33,123.3,94.9,0.9531,52.6,48.1,15.4,...,53.9,44.6,32.7,36.2,71.7,8.6,2ND,1.0,2016,75.0
1,Wisconsin,B10,40,36,129.1,93.6,0.9758,54.8,47.7,12.4,...,54.8,44.7,36.5,37.5,59.3,11.3,2ND,1.0,2015,75.0
2,Michigan,B10,40,33,114.4,90.4,0.9375,53.9,47.7,14.0,...,54.7,46.8,35.2,33.2,65.9,6.9,2ND,3.0,2018,75.0
3,Texas Tech,B12,38,31,115.2,85.2,0.9696,53.5,43.0,17.7,...,52.8,41.9,36.5,29.7,67.5,7.0,2ND,3.0,2019,75.0
4,Gonzaga,WCC,39,37,117.8,86.3,0.9728,56.6,41.1,16.2,...,56.3,40.0,38.2,29.0,71.5,7.7,2ND,1.0,2017,75.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3221,Princeton,Ivy,30,23,109.1,101.0,0.7083,52.2,48.4,16.5,...,53.3,48.1,33.7,32.7,67.0,-3.2,S16,15.0,2023,15.0
3222,Fairleigh Dickinson,NEC,35,21,106.1,114.7,0.2909,51.3,54.7,16.3,...,51.1,55.8,34.3,35.3,68.6,-12.2,R32,16.0,2023,5.0
3223,Northern Kentucky,Horz,33,22,102.6,101.4,0.5349,49.1,51.3,17.3,...,47.4,49.3,34.3,36.4,61.9,-6.7,R64,16.0,2023,0.0
3224,Howard,MEAC,32,22,102.7,106.3,0.4031,51.8,50.9,22.8,...,49.8,51.1,36.9,33.8,69.0,-7.3,R64,16.0,2023,0.0


In [1]:
import torch

In [19]:
cbb23 = pd.read_csv("/Users/jonathan/cmse401/Project/CMSE401-Project/data/cbb23.csv")
cbb23 = cbb23.dropna()

In [42]:
cbb.corr()["POSTSEASON_INT"].sort_values(ascending=False)

POSTSEASON_INT    1.000000e+00
W                 5.022740e-01
WAB               4.509306e-01
ADJOE             4.459384e-01
G                 3.853054e-01
BARTHAG           3.743954e-01
EFG_O             2.052059e-01
2P_O              1.920180e-01
ORB               1.546481e-01
3P_O              1.248625e-01
TORD              3.450582e-02
YEAR              6.387526e-14
DRB              -1.014374e-02
ADJ_T            -2.363851e-02
FTR              -6.354109e-02
FTRD             -1.305556e-01
3P_D             -1.400574e-01
2P_D             -1.546239e-01
TOR              -1.617970e-01
EFG_D            -1.954761e-01
ADJDE            -3.424668e-01
SEED             -4.513605e-01
Name: POSTSEASON_INT, dtype: float64

## Round of 64

In [141]:
regions = ['South', 'Midwest', 'East', 'West']
R1matchups_by_region = {}
for region in regions:
    region_df = cbb23[cbb23['REGION'] == region]
    R1region_matchups = []

    for j in range(8):
        team1 = region_df.iloc[j]
        team2 = region_df.iloc[15-j]
        
        if (team1["POSTSEASON"] == 'R64'):
            winner = 2
        else:
            winner = 1
        
        R1region_matchups.append({
            "Team #1": team1["TEAM"],
            "Seed1": team1["SEED"],
            "ADJOE1": team1["ADJOE"],
            "ADJDE1": team1["ADJDE"],
            "Wins1": team1["W"],
            "Turnover Rate1": team1["TOR"],
            "Offensive Reb1": team1["ORB"],

            "Team #2": team2["TEAM"],
            "Seed2": team2["SEED"],
            "ADJOE2": team2["ADJOE"],
            "ADJDE2": team2["ADJDE"],
            "Wins2": team2["W"],
            "Turnover Rate2": team2["TOR"],
            "Offensive Reb2": team2["ORB"],

            "Winner": winner
        })
    R1matchups_by_region[region] = pd.DataFrame(R1region_matchups)
R64 = pd.concat(R1matchups_by_region.values(), ignore_index=True)

## Round of 32

In [142]:
R2matchups_by_region = {}
for region in regions:
    region_df = cbb23[cbb23['REGION'] == region]
    df = R1matchups_by_region[region]
    R2region_matchups = []
    
    for j in range(4): 
        winner1 = df.iloc[j][f"Team #{df.iloc[j]['Winner']}"]
        team1 = region_df[region_df["TEAM"] == winner1].iloc[0]
        
        winner2 = df.iloc[7-j][f"Team #{df.iloc[7-j]['Winner']}"]
        team2 = region_df[region_df["TEAM"] == winner2].iloc[0]
        
        if (team1["POSTSEASON"] == 'R32'):
            winner = 2
        else:
            winner = 1
        
        R2region_matchups.append({
            "Team #1": team1["TEAM"],
            "Seed1": team1["SEED"],
            "ADJOE1": team1["ADJOE"],
            "ADJDE1": team1["ADJDE"],
            "Wins1": team1["W"],
            "Turnover Rate1": team1["TOR"],
            "Offensive Reb1": team1["ORB"],

            "Team #2": team2["TEAM"],
            "Seed2": team2["SEED"],
            "ADJOE2": team2["ADJOE"],
            "ADJDE2": team2["ADJDE"],
            "Wins2": team2["W"],
            "Turnover Rate2": team2["TOR"],
            "Offensive Reb2": team2["ORB"],

            "Winner": winner
        })
    R2matchups_by_region[region] = pd.DataFrame(R2region_matchups)
R32 = pd.concat(R2matchups_by_region.values(), ignore_index=True)

## Sweet 16

In [143]:
S16matchups_by_region = {}
for region in regions:
    region_df = cbb23[cbb23['REGION'] == region]
    df = R2matchups_by_region[region]
    S16region_matchups = []
    
    for j in range(2): 
        winner1 = df.iloc[j][f"Team #{df.iloc[j]['Winner']}"]
        team1 = region_df[region_df["TEAM"] == winner1].iloc[0]
        
        winner2 = df.iloc[3-j][f"Team #{df.iloc[3-j]['Winner']}"]
        team2 = region_df[region_df["TEAM"] == winner2].iloc[0]
        
        if (team1["POSTSEASON"] == 'S16'):
            winner = 2
        else:
            winner = 1
        
        S16region_matchups.append({
            "Team #1": team1["TEAM"],
            "Seed1": team1["SEED"],
            "ADJOE1": team1["ADJOE"],
            "ADJDE1": team1["ADJDE"],
            "Wins1": team1["W"],
            "Turnover Rate1": team1["TOR"],
            "Offensive Reb1": team1["ORB"],

            "Team #2": team2["TEAM"],
            "Seed2": team2["SEED"],
            "ADJOE2": team2["ADJOE"],
            "ADJDE2": team2["ADJDE"],
            "Wins2": team2["W"],
            "Turnover Rate2": team2["TOR"],
            "Offensive Reb2": team2["ORB"],

            "Winner": winner
        })
        S16matchups_by_region[region] = pd.DataFrame(S16region_matchups)
S16 = pd.concat(S16matchups_by_region.values(), ignore_index=True)

## Elite 8

In [145]:
E8matchup_by_region = {}
for region in regions:
    region_df = cbb23[cbb23['REGION'] == region]
    df = S16matchups_by_region[region]
    E8region_matchup = []
    
    winner1 = df.iloc[0][f"Team #{df.iloc[0]['Winner']}"]
    team1 = region_df[region_df["TEAM"] == winner1].iloc[0]
        
    winner2 = df.iloc[1][f"Team #{df.iloc[1]['Winner']}"]
    team2 = region_df[region_df["TEAM"] == winner2].iloc[0]
        
    if (team1["POSTSEASON"] == 'E8'):
        winner = 2
    else:
        winner = 1
        
    E8region_matchup.append({
        "Team #1": team1["TEAM"],
        "Seed1": team1["SEED"],
        "ADJOE1": team1["ADJOE"],
        "ADJDE1": team1["ADJDE"],
        "Wins1": team1["W"],
        "Turnover Rate1": team1["TOR"],
        "Offensive Reb1": team1["ORB"],

        "Team #2": team2["TEAM"],
        "Seed2": team2["SEED"],
        "ADJOE2": team2["ADJOE"],
        "ADJDE2": team2["ADJDE"],
        "Wins2": team2["W"],
        "Turnover Rate2": team2["TOR"],
        "Offensive Reb2": team2["ORB"],

        "Winner": winner
    })
    E8matchup_by_region[region] = pd.DataFrame(E8region_matchup)
E8 = pd.concat(E8matchup_by_region.values(), ignore_index=True)

## Final Four

In [114]:
SouthE8 = E8matchup_by_region['South']
EastE8 = E8matchup_by_region['East']
MidwestE8 = E8matchup_by_region['Midwest']
WestE8 = E8matchup_by_region['West']

South_winner = SouthE8.iloc[0][f"Team #{SouthE8.iloc[0]['Winner']}"]
East_winner = EastE8.iloc[0][f"Team #{EastE8.iloc[0]['Winner']}"]
Midwest_winner = MidwestE8.iloc[0][f"Team #{MidwestE8.iloc[0]['Winner']}"]
West_winner = WestE8.iloc[0][f"Team #{WestE8.iloc[0]['Winner']}"]

### Left Side of Bracket

In [127]:
F4_matchups = []
team1 = cbb23[cbb23["TEAM"] == South_winner].iloc[0]
team2 = cbb23[cbb23["TEAM"] == East_winner].iloc[0]

if (team1["POSTSEASON"] == 'F4'):
    winner = 2
else:
    winner = 1
        
F4_matchups.append({
    "Team #1": team1["TEAM"],
    "Seed1": team1["SEED"],
    "ADJOE1": team1["ADJOE"],
    "ADJDE1": team1["ADJDE"],
    "Wins1": team1["W"],
    "Turnover Rate1": team1["TOR"],
    "Offensive Reb1": team1["ORB"],

    "Team #2": team2["TEAM"],
    "Seed2": team2["SEED"],
    "ADJOE2": team2["ADJOE"],
    "ADJDE2": team2["ADJDE"],
    "Wins2": team2["W"],
    "Turnover Rate2": team2["TOR"],
    "Offensive Reb2": team2["ORB"],

    "Winner": winner
    })

### Right Side of Bracket

In [128]:
team1 = cbb23[cbb23["TEAM"] == Midwest_winner].iloc[0]
team2 = cbb23[cbb23["TEAM"] == West_winner].iloc[0]

if (team1["POSTSEASON"] == 'F4'):
    winner = 2
else:
    winner = 1
        
F4_matchups.append({
    "Team #1": team1["TEAM"],
    "Seed1": team1["SEED"],
    "ADJOE1": team1["ADJOE"],
    "ADJDE1": team1["ADJDE"],
    "Wins1": team1["W"],
    "Turnover Rate1": team1["TOR"],
    "Offensive Reb1": team1["ORB"],

    "Team #2": team2["TEAM"],
    "Seed2": team2["SEED"],
    "ADJOE2": team2["ADJOE"],
    "ADJDE2": team2["ADJDE"],
    "Wins2": team2["W"],
    "Turnover Rate2": team2["TOR"],
    "Offensive Reb2": team2["ORB"],

    "Winner": winner
    })

In [138]:
F4 = pd.DataFrame(F4_matchups)

## Championship

In [133]:
championship_matchup = []
winner1 = F4.iloc[0][f"Team #{F4.iloc[0]['Winner']}"]
winner2 = F4.iloc[1][f"Team #{F4.iloc[1]['Winner']}"]

team1 = cbb23[cbb23["TEAM"] == winner1].iloc[0]
team2 = cbb23[cbb23["TEAM"] == winner2].iloc[0]

if (team1["POSTSEASON"] == '2ND'):
    winner = 2
else:
    winner = 1
        
championship_matchup.append({
    "Team #1": team1["TEAM"],
    "Seed1": team1["SEED"],
    "ADJOE1": team1["ADJOE"],
    "ADJDE1": team1["ADJDE"],
    "Wins1": team1["W"],
    "Turnover Rate1": team1["TOR"],
    "Offensive Reb1": team1["ORB"],

    "Team #2": team2["TEAM"],
    "Seed2": team2["SEED"],
    "ADJOE2": team2["ADJOE"],
    "ADJDE2": team2["ADJDE"],
    "Wins2": team2["W"],
    "Turnover Rate2": team2["TOR"],
    "Offensive Reb2": team2["ORB"],

    "Winner": winner
    })
pd.DataFrame(championship_matchup)

Unnamed: 0,Team #1,Seed1,ADJOE1,ADJDE1,Wins1,Turnover Rate1,Offensive Reb1,Team #2,Seed2,ADJOE2,ADJDE2,Wins2,Turnover Rate2,Offensive Reb2,Winner
0,San Diego St.,5.0,110.8,89.5,32,17.3,31.6,Connecticut,4.0,121.5,91.2,31,18.9,38.5,2
