# **March Madness**

<img src="https://a.espncdn.com/photo/2025/0316/2025_ncaam_bracket_filled_3x2.jpg" />

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## CBB Data

**Columns**
 - CONF: Conference
 - G: Games played
 - W: Number of wins
 - ADJOE: Adjusted offensive efficiency
 - ADJDE: Adjusted defensive efficiency
 - BARTHAG: Power rating
 - EFG%: Effective field goal percentage
 - EFGD%: Effective field goal percentage allowed
 - TOR: Turnover rate
 - TORD: Forced turnover rate
 - ORB: Offensive rebound rate
 - DRB: Defensive rebound rate
 - FTR: Free throw rate
 - FTRD: Free throw rate allowed
 - 2P_O: Two-point shooting percentage
 - 2P_D: Two-point shooting percentage allowed
 - 3P_O: Three-point shooting percentage
 - 3P_D: Three-point shooting percentage allowed
 - ADJ_T: Adjusted tempo
 - WAB: Wins above bubble
 - Postseason: NCAA tournament finish
 - Seed: Seed in NCAA tournament
 

In [3]:
cbb = pd.read_csv("/Users/jonathan/cmse401/Project/CMSE401-Project/data/cbb.csv")

postseason_mapping = {"Champions": 100,
                     "2ND": 75,
                     "F4": 50,
                     "E8": 35,
                     "S16": 15,
                     "R32": 5,
                     "R64": 0}

cbb['POSTSEASON_INT'] = cbb['POSTSEASON'].map(postseason_mapping)
cbb = cbb.dropna()
cbb

Unnamed: 0,TEAM,CONF,G,W,ADJOE,ADJDE,BARTHAG,EFG_O,EFG_D,TOR,...,2P_D,3P_O,3P_D,ADJ_T,WAB,POSTSEASON,SEED,YEAR,REGION,POSTSEASON_INT
0,Kansas,B12,36,28,113.8,92.3,0.9177,52.6,47.0,17.5,...,47.2,34.7,31.0,69.0,10.1,R32,1.0,2023,West,5.0
1,Purdue,B10,35,29,117.9,93.7,0.9333,51.8,47.2,17.2,...,47.2,32.2,31.3,63.9,9.4,R64,1.0,2023,East,0.0
2,Alabama,SEC,37,31,116.1,89.0,0.9548,52.1,41.3,18.7,...,40.8,33.5,28.3,72.6,10.2,S16,1.0,2023,South,15.0
3,Houston,Amer,37,33,117.3,90.3,0.9532,52.3,42.7,15.4,...,43.4,34.0,27.9,63.4,8.0,S16,1.0,2023,Midwest,15.0
4,Gonzaga,WCC,32,28,120.3,89.9,0.9662,58.7,43.1,15.7,...,41.8,37.0,30.5,72.6,6.7,S16,1.0,2022,West,15.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3220,Weber St.,BSky,29,17,106.0,106.6,0.4831,54.1,47.8,19.0,...,47.0,39.4,33.7,64.3,-6.9,R64,16.0,2014,West,0.0
3222,Western Kentucky,SB,35,19,100.1,100.4,0.4895,48.6,47.9,22.2,...,47.6,32.7,32.4,66.0,-8.4,R64,16.0,2013,South,0.0
3223,North Carolina A&T,MEAC,36,19,90.6,95.7,0.3459,45.7,45.2,22.7,...,43.4,30.7,32.0,66.9,-11.5,R64,16.0,2013,Midwest,0.0
3224,Southern,SWAC,30,20,95.9,96.7,0.4750,49.1,41.9,16.8,...,40.5,36.7,30.0,65.0,-5.7,R64,16.0,2013,West,0.0


In [4]:
cbb.corr()["POSTSEASON_INT"].sort_values(ascending=False)

POSTSEASON_INT    1.000000e+00
W                 5.022740e-01
WAB               4.509306e-01
ADJOE             4.459384e-01
G                 3.853054e-01
BARTHAG           3.743954e-01
EFG_O             2.052059e-01
2P_O              1.920180e-01
ORB               1.546481e-01
3P_O              1.248625e-01
TORD              3.450582e-02
YEAR             -1.531867e-13
DRB              -1.014374e-02
ADJ_T            -2.363851e-02
FTR              -6.354109e-02
FTRD             -1.305556e-01
3P_D             -1.400574e-01
2P_D             -1.546239e-01
TOR              -1.617970e-01
EFG_D            -1.954761e-01
ADJDE            -3.424668e-01
SEED             -4.511072e-01
Name: POSTSEASON_INT, dtype: float64

In [5]:
regions = ['South', 'Midwest', 'East', 'West']
data_by_year = {}
for year in cbb['YEAR'].unique():
    data_by_year[year] = cbb[cbb['YEAR'] == year]

## Round of 64

In [6]:
def create_R64(data_by_year):
    R64matchups = []
    for year, data in data_by_year.items():
        for region in regions:
            region_df = data[data['REGION'] == region]
            R1region_matchups = []

            for j in range(8):
                team0 = region_df.iloc[j]
                team1 = region_df.iloc[15-j]

                if (team0["POSTSEASON"] == 'R64'):
                    winner = 1
                else:
                    winner = 0

                R1region_matchups.append({
                    "Year": int(team0["YEAR"]),
                    "Region": team0["REGION"],
                    "Team #0": team0["TEAM"],
                    "Seed0": int(team0["SEED"]),
                    "ADJOE0": team0["ADJOE"],
                    "ADJDE0": team0["ADJDE"],
                    "Wins0": team0["W"],
                    "Turnover Rate0": team0["TOR"],
                    "Offensive Reb0": team0["ORB"],

                    "Team #1": team1["TEAM"],
                    "Seed1": int(team1["SEED"]),
                    "ADJOE1": team1["ADJOE"],
                    "ADJDE1": team1["ADJDE"],
                    "Wins1": team1["W"],
                    "Turnover Rate1": team1["TOR"],
                    "Offensive Reb1": team1["ORB"],

                    "Winner": winner
                })
            R64matchups.extend(R1region_matchups)
    return pd.DataFrame(R64matchups)

## Round of 32

In [7]:
def create_R32(data_by_year, R64):
    R32matchups = []
    for year, data in data_by_year.items():
        for region in regions:
            region_df = data[data['REGION'] == region]
            df1 = R64[R64["Year"] == year]
            df = df1[df1["Region"] == region]
            R2region_matchups = []

            for j in range(4): 
                winner0 = df.iloc[j][f"Team #{df.iloc[j]['Winner']}"]
                team0 = region_df[region_df["TEAM"] == winner0].iloc[0]

                winner1 = df.iloc[7-j][f"Team #{df.iloc[7-j]['Winner']}"]
                team1 = region_df[region_df["TEAM"] == winner1].iloc[0]

                if (team0["POSTSEASON"] == 'R32'):
                    winner = 1
                else:
                    winner = 0

                R2region_matchups.append({
                    "Year": int(team0["YEAR"]),
                    "Region": team0["REGION"],
                    "Team #0": team0["TEAM"],
                    "Seed0": int(team0["SEED"]),
                    "ADJOE0": team0["ADJOE"],
                    "ADJDE0": team0["ADJDE"],
                    "Wins0": team0["W"],
                    "Turnover Rate0": team0["TOR"],
                    "Offensive Reb0": team0["ORB"],

                    "Team #1": team1["TEAM"],
                    "Seed1": int(team1["SEED"]),
                    "ADJOE1": team1["ADJOE"],
                    "ADJDE1": team1["ADJDE"],
                    "Wins1": team1["W"],
                    "Turnover Rate1": team1["TOR"],
                    "Offensive Reb1": team1["ORB"],

                    "Winner": winner
                })
            R32matchups.extend(R2region_matchups)
    return pd.DataFrame(R32matchups)

## Sweet 16

In [8]:
def create_S16(data_by_year, R32):
    S16matchups = []
    for year, data in data_by_year.items():
        for region in regions:
            region_df = data[data['REGION'] == region]
            df1 = R32[R32["Year"] == year]
            df = df1[df1["Region"] == region]
            S16region_matchups = []

            for j in range(2): 
                winner0 = df.iloc[j][f"Team #{df.iloc[j]['Winner']}"]
                team0 = region_df[region_df["TEAM"] == winner0].iloc[0]

                winner1 = df.iloc[3-j][f"Team #{df.iloc[3-j]['Winner']}"]
                team1 = region_df[region_df["TEAM"] == winner1].iloc[0]

                if (team0["POSTSEASON"] == 'S16'):
                    winner = 1
                else:
                    winner = 0

                S16region_matchups.append({
                    "Year": int(team0["YEAR"]),
                    "Region": team0["REGION"],
                    "Team #0": team0["TEAM"],
                    "Seed0": int(team0["SEED"]),
                    "ADJOE0": team0["ADJOE"],
                    "ADJDE0": team0["ADJDE"],
                    "Wins0": team0["W"],
                    "Turnover Rate0": team0["TOR"],
                    "Offensive Reb0": team0["ORB"],

                    "Team #1": team1["TEAM"],
                    "Seed1": int(team1["SEED"]),
                    "ADJOE1": team1["ADJOE"],
                    "ADJDE1": team1["ADJDE"],
                    "Wins1": team1["W"],
                    "Turnover Rate1": team1["TOR"],
                    "Offensive Reb1": team1["ORB"],

                    "Winner": winner
                })
            S16matchups.extend(S16region_matchups)
    return pd.DataFrame(S16matchups)

## Elite 8

In [9]:
def create_E8(data_by_year, S16):
    E8matchups = []
    for year, data in data_by_year.items():
        for region in regions:
            region_df = data[data['REGION'] == region]
            df1 = S16[S16["Year"] == year]
            df = df1[df1["Region"] == region]
            E8region_matchup = []

            winner0 = df.iloc[0][f"Team #{df.iloc[0]['Winner']}"]
            team0 = region_df[region_df["TEAM"] == winner0].iloc[0]

            winner1 = df.iloc[1][f"Team #{df.iloc[1]['Winner']}"]
            team1 = region_df[region_df["TEAM"] == winner1].iloc[0]

            if (team0["POSTSEASON"] == 'E8'):
                winner = 1
            else:
                winner = 0

            E8region_matchup.append({
                "Year": int(team0["YEAR"]),
                "Region": team0["REGION"],
                "Team #0": team0["TEAM"],
                "Seed0": int(team0["SEED"]),
                "ADJOE0": team0["ADJOE"],
                "ADJDE0": team0["ADJDE"],
                "Wins0": team0["W"],
                "Turnover Rate0": team0["TOR"],
                "Offensive Reb0": team0["ORB"],

                "Team #1": team1["TEAM"],
                "Seed1": int(team1["SEED"]),
                "ADJOE1": team1["ADJOE"],
                "ADJDE1": team1["ADJDE"],
                "Wins1": team1["W"],
                "Turnover Rate1": team1["TOR"],
                "Offensive Reb1": team1["ORB"],

                "Winner": winner
            })
            E8matchups.extend(E8region_matchup)
    return pd.DataFrame(E8matchups)

## Final Four

In [10]:
def create_F4(data_by_year, cbb, E8):
    F4 = []
    for year in cbb['YEAR'].unique():
        df = E8[E8['Year'] == year]
        df1 = data_by_year[year]

        south_rep = df.iloc[0][f"Team #{df.iloc[0]['Winner']}"]
        midwest_rep = df.iloc[1][f"Team #{df.iloc[1]['Winner']}"]
        east_rep = df.iloc[2][f"Team #{df.iloc[2]['Winner']}"]
        west_rep = df.iloc[3][f"Team #{df.iloc[3]['Winner']}"]

        if (year == 2016 or year == 2018):
            team1 = df1[df1["TEAM"] == south_rep].iloc[0]
            team2 = df1[df1["TEAM"] == west_rep].iloc[0]
            team3 = df1[df1["TEAM"] == midwest_rep].iloc[0]
            team4 = df1[df1["TEAM"] == east_rep].iloc[0]

        elif (year == 2013 or year == 2014 or year == 2015 or year == 2023):
            team1 = df1[df1["TEAM"] == midwest_rep].iloc[0]
            team2 = df1[df1["TEAM"] == west_rep].iloc[0]
            team3 = df1[df1["TEAM"] == south_rep].iloc[0]
            team4 = df1[df1["TEAM"] == east_rep].iloc[0]

        else:
            team1 = df1[df1["TEAM"] == south_rep].iloc[0]
            team2 = df1[df1["TEAM"] == midwest_rep].iloc[0]
            team3 = df1[df1["TEAM"] == east_rep].iloc[0]
            team4 = df1[df1["TEAM"] == west_rep].iloc[0]

        if (team1["POSTSEASON"] == 'F4'):
            winner = 1
        else:
            winner = 0

        F4.append({
                "Year": int(team1["YEAR"]),
                "Team #0": team1["TEAM"],
                "Seed0": int(team1["SEED"]),
                "ADJOE0": team1["ADJOE"],
                "ADJDE0": team1["ADJDE"],
                "Wins0": team1["W"],
                "Turnover Rate0": team1["TOR"],
                "Offensive Reb0": team1["ORB"],

                "Team #1": team2["TEAM"],
                "Seed1": int(team2["SEED"]),
                "ADJOE1": team2["ADJOE"],
                "ADJDE1": team2["ADJDE"],
                "Wins1": team2["W"],
                "Turnover Rate1": team2["TOR"],
                "Offensive Reb1": team2["ORB"],

                "Winner": winner
            })

        if (team3["POSTSEASON"] == 'F4'):
            winner = 1
        else:
            winner = 0

        F4.append({
                "Year": int(team3["YEAR"]),
                "Team #0": team3["TEAM"],
                "Seed0": int(team3["SEED"]),
                "ADJOE0": team3["ADJOE"],
                "ADJDE0": team3["ADJDE"],
                "Wins0": team3["W"],
                "Turnover Rate0": team3["TOR"],
                "Offensive Reb0": team3["ORB"],

                "Team #1": team4["TEAM"],
                "Seed1": int(team4["SEED"]),
                "ADJOE1": team4["ADJOE"],
                "ADJDE1": team4["ADJDE"],
                "Wins1": team4["W"],
                "Turnover Rate1": team4["TOR"],
                "Offensive Reb1": team4["ORB"],

                "Winner": winner
            })
    return pd.DataFrame(F4)

## Championship

Unnamed: 0,Team #1,Seed1,ADJOE1,ADJDE1,Wins1,Turnover Rate1,Offensive Reb1,Team #2,Seed2,ADJOE2,ADJDE2,Wins2,Turnover Rate2,Offensive Reb2,Winner
0,San Diego St.,5.0,110.8,89.5,32,17.3,31.6,Connecticut,4.0,121.5,91.2,31,18.9,38.5,2
