# S (Safety) round regression (Ridge)

Predict draft round 1–8 (8 = undrafted) using combine + PFF (Pass_Rush, Run_Defense, Pass_Coverage) + RAS, KNN imputation, Ridge regression.
- Train: 2015–2023 (s_training.csv; RAS and PFF already merged in data_cleaning).
- Test: s_testing.csv filtered to 2024/2025 (drafted only; actual rounds 1–7); 2026 from s_drafted_2026.csv.

In [None]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Safety feature set: combine + RAS + PFF (pass rush, run, coverage) + p4_conference
FEATURES_WITH_COLLEGE_S = [
    'Broad Jump', 'Vertical', '40yd', 'Height', 'Weight',
    'speed_score', 'explosive_score', 'RAS', 'arm_length_inches',
    'true_pass_set_pass_rush_win_rate', 'pass_rush_win_rate', 'snap_counts_pass_rush',
    'stop_percent', 'missed_tackle_rate', 'avg_depth_of_tackle', 'snap_counts_run', 'forced_fumbles',
    'yards_per_coverage_snap', 'forced_incompletion_rate', 'snap_counts_coverage', 'coverage_percent',
    'coverage_snaps_per_target', 'INT_rate', 'PBU_rate',
    'qb_rating_against', 'catch_rate', 'avg_depth_of_target',
    'p4_conference'
]
CONTAINS_WITH_COLLEGE_S = [
    'contains_broad_jump', 'contains_vertical', 'contains_40yd', 'contains_height', 'contains_weight',
    'contains_speed_score', 'contains_explosive_score', 'contains_ras', 'contains_arm_length_inches',
    'contains_true_pass_set_pass_rush_win_rate', 'contains_pass_rush_win_rate', 'contains_snap_counts_pass_rush',
    'contains_stop_percent', 'contains_missed_tackle_rate', 'contains_avg_depth_of_tackle', 'contains_snap_counts_run', 'contains_forced_fumbles',
    'contains_yards_per_coverage_snap', 'contains_forced_incompletion_rate', 'contains_snap_counts_coverage', 'contains_coverage_percent',
    'contains_coverage_snaps_per_target', 'contains_INT_rate', 'contains_PBU_rate',
    'contains_qb_rating_against', 'contains_catch_rate', 'contains_avg_depth_of_target',
    'contains_p4_conference'
]
FEATURES_WITH_COLLEGE_ALL = FEATURES_WITH_COLLEGE_S + CONTAINS_WITH_COLLEGE_S

In [115]:
# Load S training (2015–2023); RAS and PFF already in s_training.csv from data_cleaning.py
df = pd.read_csv('../data/processed/s_training.csv')
df = df[df['Year'].between(2015, 2023)].copy()
print('Train (2015–2023 Safeties):', len(df))

Train (2015–2023 Safeties): 191


In [116]:
# Print RAS and PFF availability
total_count = len(df)
ras_count = df['RAS'].notna().sum()
print(f"Players with RAS score: {ras_count} out of {total_count} ({ras_count/total_count*100:.1f}%)")
print(f"Players with True Pass Set Win Rate: {df['true_pass_set_pass_rush_win_rate'].notna().sum()} out of {total_count}")
print(f"Players with Run Defense Stop %: {df['stop_percent'].notna().sum()} out of {total_count}")
print(f"Players with missed_tackle_rate: {df['missed_tackle_rate'].notna().sum()} out of {total_count}")
print(f"Players with snap_counts_coverage: {df['snap_counts_coverage'].notna().sum()} out of {total_count}")

Players with RAS score: 120 out of 191 (62.8%)
Players with True Pass Set Win Rate: 148 out of 191
Players with Run Defense Stop %: 177 out of 191
Players with missed_tackle_rate: 177 out of 191
Players with snap_counts_coverage: 178 out of 191


In [117]:
# Height to inches
def height_inches(h):
    if pd.isna(h): return np.nan
    if isinstance(h, (int, float)) and not (isinstance(h, float) and np.isnan(h)):
        return float(h)
    s = str(h).strip()
    if '-' in s:
        parts = s.split('-')
        return int(parts[0]) * 12 + int(parts[1])
    return np.nan
df['Height'] = df['Height'].apply(height_inches)

# Speed score
df['speed_score'] = np.where(
    df['40yd'].notna() & (df['40yd'] > 0),
    df['Weight'] * 200 / (df['40yd'] ** 4),
    np.nan
)

# Explosive score (z-scores from this pool)
mean_v = df['Vertical'].mean()
std_v = df['Vertical'].std()
mean_b = df['Broad Jump'].mean()
std_b = df['Broad Jump'].std()
if std_v == 0 or np.isnan(std_v): std_v = 1.0
if std_b == 0 or np.isnan(std_b): std_b = 1.0
df['explosive_score'] = (df['Vertical'] - mean_v).fillna(0) / std_v + (df['Broad Jump'] - mean_b).fillna(0) / std_b

# P4 conference (hardcoded, same as DT)
school_alias = {
    'Ole Miss': 'Mississippi', 'Miami (FL)': 'Miami', 'Southern California': 'USC',
    'Central Florida': 'UCF', 'Brigham Young': 'BYU', 'Ohio St.': 'Ohio State',
    'Florida St.': 'Florida State', 'Kansas St.': 'Kansas State', 'Iowa St.': 'Iowa State',
    'Oklahoma St.': 'Oklahoma State', 'Penn St.': 'Penn State', 'San Diego St.': 'San Diego State',
    'San Jose St.': 'San Jose State', 'Boston Col.': 'Boston College', 'NC State': 'North Carolina State',
}
SEC_SCHOOLS = {'Alabama', 'Arkansas', 'Auburn', 'Florida', 'Georgia', 'Kentucky', 'LSU', 'Mississippi', 'Mississippi State', 'Missouri', 'South Carolina', 'Tennessee', 'Texas A&M', 'Vanderbilt', 'Oklahoma', 'Texas'}
BIG_TEN_SCHOOLS = {'Illinois', 'Indiana', 'Iowa', 'Maryland', 'Michigan', 'Michigan State', 'Minnesota', 'Nebraska', 'Northwestern', 'Ohio State', 'Penn State', 'Purdue', 'Rutgers', 'Wisconsin', 'UCLA', 'USC', 'Oregon', 'Washington'}
BIG_12_SCHOOLS = {'Baylor', 'Iowa State', 'Kansas', 'Kansas State', 'Oklahoma State', 'TCU', 'Texas Tech', 'West Virginia', 'BYU', 'UCF', 'Cincinnati', 'Houston', 'Arizona', 'Arizona State', 'Colorado', 'Utah'}
ACC_SCHOOLS = {'Boston College', 'Clemson', 'Duke', 'Florida State', 'Georgia Tech', 'Louisville', 'Miami', 'North Carolina', 'North Carolina State', 'NC State', 'Pittsburgh', 'Syracuse', 'Virginia', 'Virginia Tech', 'Wake Forest', 'California', 'SMU', 'Stanford'}
PAC12_SCHOOLS = {'Arizona', 'Arizona State', 'California', 'Colorado', 'Oregon', 'Oregon State', 'Stanford', 'UCLA', 'USC', 'Utah', 'Washington', 'Washington State'}
P4_SCHOOLS = SEC_SCHOOLS | BIG_TEN_SCHOOLS | BIG_12_SCHOOLS | ACC_SCHOOLS | PAC12_SCHOOLS
P4_SCHOOLS_NO_PAC12 = SEC_SCHOOLS | BIG_TEN_SCHOOLS | BIG_12_SCHOOLS | ACC_SCHOOLS

def is_p4(row):
    s = row.get('School')
    if pd.isna(s) or s == '': return 0
    sn = school_alias.get(s, s)
    year = row.get('Year', 2023)
    schools = P4_SCHOOLS if year <= 2023 else P4_SCHOOLS_NO_PAC12
    return 1 if sn in schools else 0
df['p4_conference'] = df.apply(is_p4, axis=1)

# Contains flags (all features)
df['contains_broad_jump'] = df['Broad Jump'].notna().astype(int)
df['contains_vertical'] = df['Vertical'].notna().astype(int)
df['contains_40yd'] = df['40yd'].notna().astype(int)
df['contains_height'] = df['Height'].notna().astype(int)
df['contains_weight'] = df['Weight'].notna().astype(int)
df['contains_speed_score'] = df['speed_score'].notna().astype(int)
df['contains_explosive_score'] = 1
df['contains_ras'] = df['RAS'].notna().astype(int)
df['contains_arm_length_inches'] = df['arm_length_inches'].notna().astype(int) if 'arm_length_inches' in df.columns else 0
df['contains_true_pass_set_pass_rush_win_rate'] = df['true_pass_set_pass_rush_win_rate'].notna().astype(int)
df['contains_pass_rush_win_rate'] = df['pass_rush_win_rate'].notna().astype(int)
df['contains_snap_counts_pass_rush'] = df['snap_counts_pass_rush'].notna().astype(int)
df['contains_stop_percent'] = df['stop_percent'].notna().astype(int) if 'stop_percent' in df.columns else 0
df['contains_missed_tackle_rate'] = df['missed_tackle_rate'].notna().astype(int) if 'missed_tackle_rate' in df.columns else 0
df['contains_avg_depth_of_tackle'] = df['avg_depth_of_tackle'].notna().astype(int) if 'avg_depth_of_tackle' in df.columns else 0
df['contains_snap_counts_run'] = df['snap_counts_run'].notna().astype(int) if 'snap_counts_run' in df.columns else 0
df['contains_forced_fumbles'] = df['forced_fumbles'].notna().astype(int) if 'forced_fumbles' in df.columns else 0
df['contains_yards_per_coverage_snap'] = df['yards_per_coverage_snap'].notna().astype(int) if 'yards_per_coverage_snap' in df.columns else 0
df['contains_forced_incompletion_rate'] = df['forced_incompletion_rate'].notna().astype(int) if 'forced_incompletion_rate' in df.columns else 0
df['contains_snap_counts_coverage'] = df['snap_counts_coverage'].notna().astype(int) if 'snap_counts_coverage' in df.columns else 0
df['contains_coverage_percent'] = df['coverage_percent'].notna().astype(int) if 'coverage_percent' in df.columns else 0
df['contains_coverage_snaps_per_target'] = df['coverage_snaps_per_target'].notna().astype(int) if 'coverage_snaps_per_target' in df.columns else 0
df['contains_INT_rate'] = df['INT_rate'].notna().astype(int) if 'INT_rate' in df.columns else 0
df['contains_PBU_rate'] = df['PBU_rate'].notna().astype(int) if 'PBU_rate' in df.columns else 0
df['contains_qb_rating_against'] = df['qb_rating_against'].notna().astype(int) if 'qb_rating_against' in df.columns else 0
df['contains_catch_rate'] = df['catch_rate'].notna().astype(int) if 'catch_rate' in df.columns else 0
df['contains_avg_depth_of_target'] = df['avg_depth_of_target'].notna().astype(int) if 'avg_depth_of_target' in df.columns else 0
df['contains_p4_conference'] = df['School'].notna().astype(int)

In [118]:
# Target: round 1–7 if drafted, 8 if undrafted
y = np.where(
    df['Drafted'].astype(bool),
    np.clip(df['Round'].fillna(1).astype(int), 1, 7),
    8
)
X_raw = df[FEATURES_WITH_COLLEGE_ALL].copy()

# KNN imputation + scale
imputer = KNNImputer(n_neighbors=10)
X = imputer.fit_transform(X_raw)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Ridge regression
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_scaled, y)

y_pred_train = np.clip(ridge.predict(X_scaled), 1, 8)
print('Train MAE (round 1–8):', round(mean_absolute_error(y, y_pred_train), 4))
print('Train samples:', len(y))

Train MAE (round 1–8): 1.4624
Train samples: 191


In [119]:
def prepare_s_df(ldf, year):
    """Add Height inches, speed_score, explosive_score, p4, contains_* to a Safety dataframe (RAS/PFF already in CSV)."""
    ldf = ldf.copy()
    ldf['Year'] = year
    if ldf['Height'].dtype == object or (ldf['Height'].astype(str).str.contains('-', na=False).any()):
        ldf['Height'] = ldf['Height'].apply(height_inches)
    else:
        ldf['Height'] = pd.to_numeric(ldf['Height'], errors='coerce')
    ldf['speed_score'] = np.where(
        ldf['40yd'].notna() & (ldf['40yd'] > 0),
        ldf['Weight'] * 200 / (ldf['40yd'] ** 4),
        np.nan
    )
    ldf['explosive_score'] = (ldf['Vertical'] - mean_v).fillna(0) / std_v + (ldf['Broad Jump'] - mean_b).fillna(0) / std_b
    ldf['p4_conference'] = ldf.apply(is_p4, axis=1)
    ldf['contains_broad_jump'] = ldf['Broad Jump'].notna().astype(int)
    ldf['contains_vertical'] = ldf['Vertical'].notna().astype(int)
    ldf['contains_40yd'] = ldf['40yd'].notna().astype(int)
    ldf['contains_height'] = ldf['Height'].notna().astype(int)
    ldf['contains_weight'] = ldf['Weight'].notna().astype(int)
    ldf['contains_speed_score'] = ldf['speed_score'].notna().astype(int)
    ldf['contains_explosive_score'] = 1
    ldf['contains_ras'] = ldf['RAS'].notna().astype(int)
    ldf['contains_arm_length_inches'] = ldf['arm_length_inches'].notna().astype(int) if 'arm_length_inches' in ldf.columns else 0
    ldf['contains_true_pass_set_pass_rush_win_rate'] = ldf['true_pass_set_pass_rush_win_rate'].notna().astype(int) if 'true_pass_set_pass_rush_win_rate' in ldf.columns else 0
    ldf['contains_pass_rush_win_rate'] = ldf['pass_rush_win_rate'].notna().astype(int) if 'pass_rush_win_rate' in ldf.columns else 0
    ldf['contains_snap_counts_pass_rush'] = ldf['snap_counts_pass_rush'].notna().astype(int) if 'snap_counts_pass_rush' in ldf.columns else 0
    ldf['contains_stop_percent'] = ldf['stop_percent'].notna().astype(int) if 'stop_percent' in ldf.columns else 0
    ldf['contains_missed_tackle_rate'] = ldf['missed_tackle_rate'].notna().astype(int) if 'missed_tackle_rate' in ldf.columns else 0
    ldf['contains_avg_depth_of_tackle'] = ldf['avg_depth_of_tackle'].notna().astype(int) if 'avg_depth_of_tackle' in ldf.columns else 0
    ldf['contains_snap_counts_run'] = ldf['snap_counts_run'].notna().astype(int) if 'snap_counts_run' in ldf.columns else 0
    ldf['contains_forced_fumbles'] = ldf['forced_fumbles'].notna().astype(int) if 'forced_fumbles' in ldf.columns else 0
    ldf['contains_yards_per_coverage_snap'] = ldf['yards_per_coverage_snap'].notna().astype(int) if 'yards_per_coverage_snap' in ldf.columns else 0
    ldf['contains_forced_incompletion_rate'] = ldf['forced_incompletion_rate'].notna().astype(int) if 'forced_incompletion_rate' in ldf.columns else 0
    ldf['contains_snap_counts_coverage'] = ldf['snap_counts_coverage'].notna().astype(int) if 'snap_counts_coverage' in ldf.columns else 0
    ldf['contains_coverage_percent'] = ldf['coverage_percent'].notna().astype(int) if 'coverage_percent' in ldf.columns else 0
    ldf['contains_coverage_snaps_per_target'] = ldf['coverage_snaps_per_target'].notna().astype(int) if 'coverage_snaps_per_target' in ldf.columns else 0
    ldf['contains_INT_rate'] = ldf['INT_rate'].notna().astype(int) if 'INT_rate' in ldf.columns else 0
    ldf['contains_PBU_rate'] = ldf['PBU_rate'].notna().astype(int) if 'PBU_rate' in ldf.columns else 0
    ldf['contains_qb_rating_against'] = ldf['qb_rating_against'].notna().astype(int) if 'qb_rating_against' in ldf.columns else 0
    ldf['contains_catch_rate'] = ldf['catch_rate'].notna().astype(int) if 'catch_rate' in ldf.columns else 0
    ldf['contains_avg_depth_of_target'] = ldf['avg_depth_of_target'].notna().astype(int) if 'avg_depth_of_target' in ldf.columns else 0
    ldf['contains_p4_conference'] = ldf['School'].notna().astype(int)
    return ldf

# 2024 and 2025 from s_testing.csv (PFF/RAS already merged in data_cleaning)
s_testing = pd.read_csv('../data/processed/s_testing.csv')
s_2024 = prepare_s_df(s_testing[s_testing['Year'] == 2024], 2024)
s_2025 = prepare_s_df(s_testing[s_testing['Year'] == 2025], 2025)

X_24_raw = s_2024[FEATURES_WITH_COLLEGE_ALL].copy()
X_25_raw = s_2025[FEATURES_WITH_COLLEGE_ALL].copy()
X_24 = imputer.transform(X_24_raw)
X_25 = imputer.transform(X_25_raw)
X_24_scaled = scaler.transform(X_24)
X_25_scaled = scaler.transform(X_25)

pred_24 = np.clip(ridge.predict(X_24_scaled), 1, 8)
pred_25 = np.clip(ridge.predict(X_25_scaled), 1, 8)

actual_24 = s_2024['Round'].astype(int).values
actual_25 = s_2025['Round'].astype(int).values

def eval_metrics(actual, pred, label):
    mae = mean_absolute_error(actual, pred)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    r2 = r2_score(actual, pred)
    exact = (np.round(pred) == actual).mean()
    within_1 = (np.abs(np.round(pred) - actual) <= 1).mean()
    print(f'{label} (n={len(actual)}): MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}, Exact={exact:.2%}, Within-1={within_1:.2%}')

print('2024 Safeties:')
eval_metrics(actual_24, pred_24, '2024')
print('2025 Safeties:')
eval_metrics(actual_25, pred_25, '2025')

2024 Safeties:
2024 (n=21): MAE=1.8201, RMSE=2.0700, R²=-0.3674, Exact=14.29%, Within-1=42.86%
2025 Safeties:
2025 (n=17): MAE=1.3868, RMSE=1.9232, R²=-0.2314, Exact=23.53%, Within-1=70.59%


In [120]:
# Dataframes: players with actual round, model prediction, tier label, and interpretation
def pred_round_to_tier(p):
    if p < 1.75: return ('Round 1 Tier', 'True 1st-round grade')
    if p < 2.75: return ('Round 2 Tier', 'Early Day 2')
    if p < 3.75: return ('Round 3 Tier', 'Late Day 2')
    if p < 4.75: return ('Round 4 Tier', 'Early Day 3')
    if p < 5.75: return ('Round 5 Tier', 'Mid Day 3')
    if p < 6.75: return ('Round 6 Tier', 'Late Day 3')
    return ('Round 7 / UDFA Tier', 'Fringe draftable')

s_2024_display = s_2024[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
s_2024_display['predicted_round'] = pred_24
s_2024_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_24]
s_2024_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_24]
s_2024_display['Round'] = s_2024_display['Round'].astype(int)

s_2025_display = s_2025[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
s_2025_display['predicted_round'] = pred_25
s_2025_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_25]
s_2025_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_25]
s_2025_display['Round'] = s_2025_display['Round'].astype(int)

print('2024 drafted Safeties')
display(s_2024_display)
print('2025 drafted Safeties')
display(s_2025_display)

2024 drafted Safeties


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,2,58.0,Javon Bullard,Georgia,2024,4.31921,Round 4 Tier,Early Day 3
1,5,133.0,Jaden Hicks,Washington State,2024,3.866917,Round 4 Tier,Early Day 3
2,2,47.0,Tyler Nubin,Minnesota,2024,5.297875,Round 5 Tier,Mid Day 3
3,2,60.0,Cole Bishop,Utah,2024,4.323064,Round 4 Tier,Early Day 3
4,4,124.0,Malik Mustapha,Wake Forest,2024,5.114507,Round 5 Tier,Mid Day 3
5,6,169.0,Kitan Oladapo,Oregon State,2024,4.617983,Round 4 Tier,Early Day 3
6,3,89.0,Tykee Smith,Georgia,2024,3.080684,Round 3 Tier,Late Day 2
7,3,78.0,Calen Bullock,USC,2024,6.31018,Round 6 Tier,Late Day 3
8,4,99.0,Kamren Kinchens,Miami,2024,6.517914,Round 6 Tier,Late Day 3
9,6,164.0,Jaylin Simpson,Auburn,2024,4.031516,Round 4 Tier,Early Day 3


2025 drafted Safeties


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
21,2,35.0,Nick Emmanwori,South Carolina,2025,1.0,Round 1 Tier,True 1st-round grade
22,1,27.0,Malaki Starks,Georgia,2025,6.291403,Round 6 Tier,Late Day 3
23,3,96.0,Xavier Watts,Notre Dame,2025,3.822028,Round 4 Tier,Early Day 3
24,2,64.0,Andrew Mukuba,Texas,2025,1.834229,Round 2 Tier,Early Day 2
25,3,82.0,Kevin Winston Jr.,Penn State,2025,5.166035,Round 5 Tier,Mid Day 3
26,4,122.0,Lathan Ransom,Ohio State,2025,3.355321,Round 3 Tier,Late Day 2
27,4,118.0,Billy Bowman Jr.,Oklahoma,2025,4.614503,Round 4 Tier,Early Day 3
28,3,93.0,Jonas Sanker,Virginia,2025,4.349621,Round 4 Tier,Early Day 3
29,4,130.0,Malachi Moore,Alabama,2025,4.84375,Round 5 Tier,Mid Day 3
30,6,187.0,Jaylen Reed,Penn State,2025,4.311482,Round 4 Tier,Early Day 3


In [121]:
# 2026 evaluation (s_drafted_2026.csv has PFF/RAS from data_cleaning)
s_2026 = prepare_s_df(pd.read_csv('s_drafted_2026.csv'), 2026)

X_26_raw = s_2026[FEATURES_WITH_COLLEGE_ALL].copy()
X_26 = imputer.transform(X_26_raw)
X_26_scaled = scaler.transform(X_26)

pred_26 = np.clip(ridge.predict(X_26_scaled), 1, 8)

# Check if Round column exists and has valid values for evaluation
if 'Round' in s_2026.columns and s_2026['Round'].notna().any():
    actual_26 = s_2026['Round'].astype(int).values
    print('2026 Safeties:')
    eval_metrics(actual_26, pred_26, '2026')
else:
    print(f'2026 Safeties (n={len(pred_26)}): Predictions generated (no actual rounds available)')

# Display 2026 predictions
s_2026_display = s_2026[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
s_2026_display['predicted_round'] = pred_26
s_2026_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_26]
s_2026_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_26]
if 'Round' in s_2026_display.columns and s_2026_display['Round'].notna().any():
    s_2026_display['Round'] = s_2026_display['Round'].astype(int)

print('\n2026 Safeties (predictions)')
display(s_2026_display)

2026 Safeties (n=20): Predictions generated (no actual rounds available)

2026 Safeties (predictions)


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,,,Caleb Downs,Alabama,2026,4.845016,Round 5 Tier,Mid Day 3
1,,,Dillon Thieneman,Purdue,2026,4.73248,Round 4 Tier,Early Day 3
2,,,Emmanuel McNeil-Warren,Toledo,2026,5.596287,Round 5 Tier,Mid Day 3
3,,,Michael Taaffe,Texas,2026,3.803967,Round 4 Tier,Early Day 3
4,,,A.J. Haulcy,New Mexico,2026,4.594728,Round 4 Tier,Early Day 3
5,,,Kamari Ramsey,UCLA,2026,7.835263,Round 7 / UDFA Tier,Fringe draftable
6,,,Zakee Wheatley,Penn State,2026,4.647068,Round 4 Tier,Early Day 3
7,,,Genesis Smith,Arizona,2026,5.649508,Round 5 Tier,Mid Day 3
8,,,Bishop Fitzgerald,North Carolina State,2026,4.277917,Round 4 Tier,Early Day 3
9,,,Jalon Kilgore,South Carolina,2026,3.257723,Round 3 Tier,Late Day 2


In [122]:
# Model results on entire training set (2017–2023), ordered by predicted_round
train_display = df[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
train_display['predicted_round'] = y_pred_train
train_display['tier_label'] = [pred_round_to_tier(x)[0] for x in y_pred_train]
train_display['interpretation'] = [pred_round_to_tier(x)[1] for x in y_pred_train]
train_display = train_display.sort_values('predicted_round').reset_index(drop=True)
train_display

Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,2.0,63.0,Juan Thornhill,Virginia,2019,1.198314,Round 1 Tier,True 1st-round grade
1,1.0,17.0,Derwin James,Florida State,2018,1.571344,Round 1 Tier,True 1st-round grade
2,5.0,163.0,Darrick Forrest,Cincinnati,2021,2.164559,Round 2 Tier,Early Day 2
3,1.0,21.0,Darnell Savage,Maryland,2019,2.424592,Round 2 Tier,Early Day 2
4,2.0,56.0,Obi Melifonwu,Connecticut,2017,2.478645,Round 2 Tier,Early Day 2
...,...,...,...,...,...,...,...,...
186,,,Lukas Denis,Boston Col.,2019,8.000000,Round 7 / UDFA Tier,Fringe draftable
187,,,Mike Bell,Fresno State,2019,8.000000,Round 7 / UDFA Tier,Fringe draftable
188,,,Van Smith,Clemson,2018,8.000000,Round 7 / UDFA Tier,Fringe draftable
189,,,Anthony Jefferson,UCLA,2015,8.000000,Round 7 / UDFA Tier,Fringe draftable


In [123]:
train_display[train_display['Player'] == 'Kyle Hamilton']

Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
38,1.0,14.0,Kyle Hamilton,Notre Dame,2022,3.789697,Round 4 Tier,Early Day 3
