In [1]:
!pip install torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


## 📜 Credits

- [Hugging Face: Jensen-holm/Nigl Data](https://huggingface.co/Jensen-holm/Nigl/tree/main/data)
- [Kaggle: March Machine Learning Mania 2025](https://www.kaggle.com/competitions/march-machine-learning-mania-2025/)

### Features

- **ChalkSeedDiff**: Team2 seed − Team1 seed  
- **ScoreDiff mean reg**: Team1 average score difference - Team 2's  
- **FGMDiff mean reg**: Team1 average field goals made difference in regular season - Team2
- **FGM3Diff mean reg**: Team1 average 3-point FGM difference in regular season  - Team2
- **TODiff mean reg**: Team1 average turnover difference in regular season  - Team2
- **OppScore mean reg**: Team2 average points allowed in regular season  
- **OppFGM mean reg**: Team2 average field goals made allowed  
- **OppFGM3 mean reg**: Team2 average 3-point FGM allowed  
- **OppTO mean reg**: Team2 average turnovers forced  

## Check Data

In [3]:
import pandas as pd
import math

ex = pd.read_csv('ex_row.csv')
print(ex['ChalkSeedDiff'], ex['ScoreDiff mean reg'], ex['FGMDiff mean reg'], ex['FGM3Diff mean reg']
     , ex['TODiff mean reg'], ex['OppScore mean reg'], ex['OppFGM mean reg'], ex['OppFGM3 mean reg'],
     ex['OppTO mean reg'])
print(math.isclose(float(ex['OppChalkSeed'].iloc[0]) - float(ex['ChalkSeed'].iloc[0]), float(ex['ChalkSeedDiff'].iloc[0]), rel_tol=1e-6))
print(math.isclose(float(ex['TeamScore mean reg'].iloc[0]) - float(ex['OppScore mean reg'].iloc[0]), float(ex['ScoreDiff mean reg'].iloc[0]), rel_tol=1e-6))
print(math.isclose(float(ex['TeamFGM mean reg'].iloc[0]) - float(ex['OppFGM mean reg'].iloc[0]), float(ex['FGMDiff mean reg'].iloc[0]), rel_tol=1e-6))
print(math.isclose(float(ex['TeamFGM3 mean reg'].iloc[0]) - float(ex['OppFGM3 mean reg'].iloc[0]), float(ex['FGM3Diff mean reg'].iloc[0]), rel_tol=1e-6))
print(math.isclose(float(ex['TeamTO mean reg'].iloc[0]) - float(ex['OppTO mean reg'].iloc[0]), float(ex['TODiff mean reg'].iloc[0]), rel_tol=1e-6))

0   -2
Name: ChalkSeedDiff, dtype: int64 0    4.285714
Name: ScoreDiff mean reg, dtype: float64 0    0.785714
Name: FGMDiff mean reg, dtype: float64 0    0
Name: FGM3Diff mean reg, dtype: int64 0   -0.571429
Name: TODiff mean reg, dtype: float64 0    65
Name: OppScore mean reg, dtype: int64 0    23.25
Name: OppFGM mean reg, dtype: float64 0    6.357143
Name: OppFGM3 mean reg, dtype: float64 0    13.857143
Name: OppTO mean reg, dtype: float64
True
True
True
True
True


## Prep Data

In [4]:
import pandas as pd
from collections import Counter

ids = pd.read_csv('MTeams.csv', index_col=False)[['TeamID', 'TeamName']]

df = pd.read_csv('MRegularSeasonDetailedResults.csv')
df = df[df['Season'] == 2025]
name_to_id = dict(zip(ids['TeamName'], ids['TeamID']))

# Initialize an empty dictionary to store the summed statistics for each team
team_stats = {}
team_appearances = {}

# Iterate over each game in the dataframe
for _, row in df.iterrows():
    # Update stats for the winning team
    if row['WTeamID'] not in team_stats:
        team_stats[row['WTeamID']] = {'Score': 0, 'Score Diff': 0, 'FGM': 0, 'FGM Diff': 0,
            'FGM3': 0, 'FGM3 Diff': 0, 'TO': 0, 'TO Diff': 0}
    team_stats[row['WTeamID']]['Score'] += row['WScore']
    team_stats[row['WTeamID']]['Score Diff'] += (row['WScore'] - row['LScore'])
    team_stats[row['WTeamID']]['FGM'] += row['WFGM']
    team_stats[row['WTeamID']]['FGM Diff'] += (row['WFGM'] - row['LFGM'])
    team_stats[row['WTeamID']]['FGM3'] += row['WFGM3']
    team_stats[row['WTeamID']]['FGM3 Diff'] += (row['WFGM3'] - row['LFGM3'])
    team_stats[row['WTeamID']]['TO'] += row['WTO']
    team_stats[row['WTeamID']]['TO Diff'] += (row['WTO'] - row['LTO'])
    team_appearances[row['WTeamID']] = team_appearances.get(row['WTeamID'], 0) + 1
    
    # Update stats for the losing team
    if row['LTeamID'] not in team_stats:
        team_stats[row['LTeamID']] = {'Score': 0, 'Score Diff': 0, 'FGM': 0, 'FGM Diff': 0,
            'FGM3': 0, 'FGM3 Diff': 0, 'TO': 0, 'TO Diff': 0}
    team_stats[row['LTeamID']]['Score'] += row['LScore']
    team_stats[row['LTeamID']]['Score Diff'] += (row['LScore'] - row['WScore'])
    team_stats[row['LTeamID']]['FGM'] += row['LFGM']
    team_stats[row['LTeamID']]['FGM Diff'] += (row['LFGM'] - row['WFGM'])
    team_stats[row['LTeamID']]['FGM3'] += row['LFGM3']
    team_stats[row['LTeamID']]['FGM3 Diff'] += (row['LFGM3'] - row['WFGM3'])
    team_stats[row['LTeamID']]['TO'] += row['LTO']
    team_stats[row['LTeamID']]['TO Diff'] += (row['LTO'] - row['WTO'])
    team_appearances[row['LTeamID']] = team_appearances.get(row['LTeamID'], 0) + 1

In [5]:
# Convert the dictionary into a DataFrame
team_stats_df = pd.DataFrame.from_dict(team_stats, orient='index').reset_index()
team_stats_df.columns = ['TeamID', 'TotalScore', 'TotalScoreDiff', 'TotalFGM', 'TotalFGMDiff',
                         'TotalFGM3', 'TotalFGM3Diff', 'TotalTO', 'TotalTODiff']
team_stats_df['TotalAppearances'] = team_stats_df['TeamID'].map(team_appearances)
team_stats_df['AvgScore'] = team_stats_df['TotalScore'] / team_stats_df['TotalAppearances']
team_stats_df['AvgScoreDiff'] = team_stats_df['TotalScoreDiff'] / team_stats_df['TotalAppearances']
team_stats_df['AvgFGM'] = team_stats_df['TotalFGM'] / team_stats_df['TotalAppearances']
team_stats_df['AvgFGMDiff'] = team_stats_df['TotalFGMDiff'] / team_stats_df['TotalAppearances']
team_stats_df['AvgFGM3'] = team_stats_df['TotalFGM3'] / team_stats_df['TotalAppearances']
team_stats_df['AvgFGM3Diff'] = team_stats_df['TotalFGM3Diff'] / team_stats_df['TotalAppearances']
team_stats_df['AvgTO'] = team_stats_df['TotalTO'] / team_stats_df['TotalAppearances']
team_stats_df['AvgTODiff'] = team_stats_df['TotalTODiff'] / team_stats_df['TotalAppearances']

id_counts = Counter(team_stats.keys())
duplicates = [team_id for team_id, count in id_counts.items() if count > 1]

if duplicates:
    print(f"Duplicate TeamIDs found: {duplicates}")
else:
    print("All TeamIDs are unique.")

All TeamIDs are unique.


In [6]:
# Get the games that will be played
slots = pd.read_csv('MNCAATourneySeedRoundSlots.csv')
seeds = pd.read_csv('MNCAATourneySeeds.csv')
seeds_specific = seeds[seeds['Season'] == 2025][['Seed', 'TeamID']]
team_id_to_seed = dict(zip(seeds_specific['TeamID'], seeds_specific['Seed'].str.replace(r'[A-Za-z]', '', regex=True)))

# only include seeds that map to players
slots = slots[slots['Seed'].isin(seeds_specific['Seed'])]
slots = slots.merge(seeds_specific, on='Seed', how='left')
slots['Seed'] = slots['TeamID']
slots = slots.drop(columns='TeamID')

grouped_slots = slots.groupby(['GameRound', 'GameSlot'], as_index=False).agg({'Seed': lambda x: list(x)})
grouped_slots = grouped_slots.rename(columns={'Seed': 'SeedPossibilities'})

# Maryland vs Colorado St
team_1_name = 'Maryland'
team_2_name = 'Colorado St'
m_id = name_to_id[team_1_name]
cs_id = name_to_id[team_2_name] 
check_r = grouped_slots[grouped_slots['SeedPossibilities'].apply(lambda x: m_id in x and cs_id in x)]
check_r = check_r.loc[check_r['GameRound'].idxmin()]

m_r = team_stats_df[team_stats_df['TeamID'] == m_id]
cs_r = team_stats_df[team_stats_df['TeamID'] == cs_id]

seed_diff = int(team_id_to_seed[cs_id]) - int(team_id_to_seed[m_id])
score_diff = float(m_r['AvgScoreDiff'].iat[0] - cs_r['AvgScoreDiff'].iat[0])
fgm_diff = float(m_r['AvgFGMDiff'].iat[0] - cs_r['AvgFGMDiff'].iat[0])
fgm3_diff = float(m_r['AvgFGM3Diff'].iat[0] - cs_r['AvgFGM3Diff'].iat[0])
to_diff = float(m_r['AvgTODiff'].iat[0] - cs_r['AvgTODiff'].iat[0])

vals = [seed_diff, score_diff, fgm_diff, fgm3_diff, to_diff,
        float(cs_r['AvgScoreDiff'].iat[0]),
        float(cs_r['AvgFGMDiff'].iat[0]),
        float(cs_r['AvgFGM3Diff'].iat[0]),
        float(cs_r['AvgTODiff'].iat[0])]

print(f"Seed diff: {seed_diff} → {'Edge: ' + team_1_name if -seed_diff < 0 else 'Edge: ' + team_2_name}")
print(f"Avg Score Diff: {score_diff:.2f} → {'Edge: ' + team_1_name if score_diff > 0 else 'Edge: ' + team_2_name}")
print(f"Avg FGM Diff: {fgm_diff:.2f} → {'Edge: ' + team_1_name if fgm_diff > 0 else 'Edge: ' + team_2_name}")
print(f"Avg FGM3 Diff: {fgm3_diff:.2f} → {'Edge: ' + team_1_name if fgm3_diff > 0 else 'Edge: ' + team_2_name}")
print(f"Avg TO Diff: {to_diff:.2f} → {'Edge: ' + team_1_name if to_diff > 0 else 'Edge: ' + team_2_name}")

Seed diff: 8 → Edge: Maryland
Avg Score Diff: 6.52 → Edge: Maryland
Avg FGM Diff: 1.63 → Edge: Maryland
Avg FGM3 Diff: 0.02 → Edge: Maryland
Avg TO Diff: -3.48 → Edge: Colorado St


In [None]:
import torch
import torch.nn as nn

input_data = torch.tensor(vals, dtype=torch.float32).unsqueeze(0)

In [14]:
# Define model
num_features = 9
class NiglNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.activation_func = nn.Sigmoid()
        self.layer1 = nn.Linear(num_features, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 16)
        self.layer4 = nn.Linear(16, 8)
        self.layer5 = nn.Linear(8, 4)
        self.layer6 = nn.Linear(4, 1)

    def forward(self, x: torch.Tensor):
        x = self.layer1(x)
        x = self.activation_func(x)
        x = self.layer2(x)
        x = self.activation_func(x)
        x = self.layer3(x)
        x = self.activation_func(x)
        x = self.layer4(x)
        x = self.activation_func(x)
        x = self.layer5(x)
        x = self.activation_func(x)
        x = self.layer6(x)
        x = self.activation_func(x)
        return x

# Load the model
torch.serialization.add_safe_globals([NiglNN])
# Load the model
model = torch.load("Mnn10k.pth", map_location=torch.device('cpu'), weights_only=False)
model.eval()

# Make prediction
with torch.no_grad():
    pred = model(input_data)
    prediction = pred.item()
    if prediction > 0.5:
        print(f"The model predicts {team_1_name} will win with a confidence of {prediction:.4f}.")
    else:
        print(f"The model predicts {team_2_name} will win with a confidence of {1 - prediction:.4f}.")

The model predicts Colorado St will win with a confidence of 0.9999.
