# LB round regression (Ridge)

Predict draft round 1–8 (8 = undrafted) using combine + PFF (Pass_Rush, Run_Defense, Pass_Coverage) + RAS, KNN imputation, Ridge regression.
- Train: 2015–2023 (lb_training.csv; RAS and PFF already merged in data_cleaning).
- Test: lb_testing.csv filtered to 2024/2025 (drafted only; actual rounds 1–7); 2026 from lb_drafted_2026.csv.

In [74]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Same base as DT (combine + arm_length + RAS) + Run_Defense + Pass_Coverage metrics
FEATURES_WITH_COLLEGE = [
    'Broad Jump', 'Vertical', '40yd', 'Height', 'Weight',
    'speed_score', 'explosive_score', 'RAS', 'arm_length_inches',
    'true_pass_set_pass_rush_win_rate', 'pass_rush_win_rate', 'snap_counts_pass_rush', 'stop_percent',
    'missed_tackle_rate', 'avg_depth_of_tackle', 'snap_counts_run', 'forced_fumbles',
    'yards_per_coverage_snap', 'forced_incompletion_rate', 'snap_counts_coverage', 'coverage_percent',
    'interceptions', 'pass_break_ups', 'coverage_snaps_per_target', 'INT_rate', 'PBU_rate',
    'p4_conference'
]
CONTAINS_WITH_COLLEGE = [
    'contains_broad_jump', 'contains_vertical', 'contains_40yd', 'contains_height', 'contains_weight',
    'contains_speed_score', 'contains_explosive_score', 'contains_ras', 'contains_arm_length_inches',
    'contains_true_pass_set_pass_rush_win_rate', 'contains_pass_rush_win_rate', 'contains_snap_counts_pass_rush', 'contains_stop_percent',
    'contains_missed_tackle_rate', 'contains_avg_depth_of_tackle', 'contains_snap_counts_run', 'contains_forced_fumbles',
    'contains_yards_per_coverage_snap', 'contains_forced_incompletion_rate', 'contains_snap_counts_coverage', 'contains_coverage_percent',
    'contains_interceptions', 'contains_pass_break_ups', 'contains_coverage_snaps_per_target', 'contains_INT_rate', 'contains_PBU_rate',
    'contains_p4_conference'
]
FEATURES_WITH_COLLEGE_ALL = FEATURES_WITH_COLLEGE + CONTAINS_WITH_COLLEGE

In [75]:
# Load LB training (2015–2023); RAS and PFF already in lb_training.csv from data_cleaning.py
df = pd.read_csv('../data/processed/lb_training.csv')
df = df[df['Year'].between(2015, 2023)].copy()
print('Train (2015–2023 LBs):', len(df))

Train (2015–2023 LBs): 327


In [76]:
# Print RAS and PFF availability
total_count = len(df)
ras_count = df['RAS'].notna().sum()
print(f"Players with RAS score: {ras_count} out of {total_count} ({ras_count/total_count*100:.1f}%)")
print(f"Players with True Pass Set Win Rate: {df['true_pass_set_pass_rush_win_rate'].notna().sum()} out of {total_count}")
print(f"Players with Run Defense Stop %: {df['stop_percent'].notna().sum()} out of {total_count}")
print(f"Players with missed_tackle_rate: {df['missed_tackle_rate'].notna().sum()} out of {total_count}")
print(f"Players with snap_counts_coverage: {df['snap_counts_coverage'].notna().sum()} out of {total_count}")

Players with RAS score: 199 out of 327 (60.9%)
Players with True Pass Set Win Rate: 303 out of 327
Players with Run Defense Stop %: 306 out of 327
Players with missed_tackle_rate: 305 out of 327
Players with snap_counts_coverage: 306 out of 327


In [77]:
# Height to inches
def height_inches(h):
    if pd.isna(h): return np.nan
    if isinstance(h, (int, float)) and not (isinstance(h, float) and np.isnan(h)):
        return float(h)
    s = str(h).strip()
    if '-' in s:
        parts = s.split('-')
        return int(parts[0]) * 12 + int(parts[1])
    return np.nan
df['Height'] = df['Height'].apply(height_inches)

# Speed score
df['speed_score'] = np.where(
    df['40yd'].notna() & (df['40yd'] > 0),
    df['Weight'] * 200 / (df['40yd'] ** 4),
    np.nan
)

# Explosive score (z-scores from this pool)
mean_v = df['Vertical'].mean()
std_v = df['Vertical'].std()
mean_b = df['Broad Jump'].mean()
std_b = df['Broad Jump'].std()
if std_v == 0 or np.isnan(std_v): std_v = 1.0
if std_b == 0 or np.isnan(std_b): std_b = 1.0
df['explosive_score'] = (df['Vertical'] - mean_v).fillna(0) / std_v + (df['Broad Jump'] - mean_b).fillna(0) / std_b

# P4 conference (hardcoded, same as DT)
school_alias = {
    'Ole Miss': 'Mississippi', 'Miami (FL)': 'Miami', 'Southern California': 'USC',
    'Central Florida': 'UCF', 'Brigham Young': 'BYU', 'Ohio St.': 'Ohio State',
    'Florida St.': 'Florida State', 'Kansas St.': 'Kansas State', 'Iowa St.': 'Iowa State',
    'Oklahoma St.': 'Oklahoma State', 'Penn St.': 'Penn State', 'San Diego St.': 'San Diego State',
    'San Jose St.': 'San Jose State', 'Boston Col.': 'Boston College', 'NC State': 'North Carolina State',
}
SEC_SCHOOLS = {'Alabama', 'Arkansas', 'Auburn', 'Florida', 'Georgia', 'Kentucky', 'LSU', 'Mississippi', 'Mississippi State', 'Missouri', 'South Carolina', 'Tennessee', 'Texas A&M', 'Vanderbilt', 'Oklahoma', 'Texas'}
BIG_TEN_SCHOOLS = {'Illinois', 'Indiana', 'Iowa', 'Maryland', 'Michigan', 'Michigan State', 'Minnesota', 'Nebraska', 'Northwestern', 'Ohio State', 'Penn State', 'Purdue', 'Rutgers', 'Wisconsin', 'UCLA', 'USC', 'Oregon', 'Washington'}
BIG_12_SCHOOLS = {'Baylor', 'Iowa State', 'Kansas', 'Kansas State', 'Oklahoma State', 'TCU', 'Texas Tech', 'West Virginia', 'BYU', 'UCF', 'Cincinnati', 'Houston', 'Arizona', 'Arizona State', 'Colorado', 'Utah'}
ACC_SCHOOLS = {'Boston College', 'Clemson', 'Duke', 'Florida State', 'Georgia Tech', 'Louisville', 'Miami', 'North Carolina', 'North Carolina State', 'NC State', 'Pittsburgh', 'Syracuse', 'Virginia', 'Virginia Tech', 'Wake Forest', 'California', 'SMU', 'Stanford'}
PAC12_SCHOOLS = {'Arizona', 'Arizona State', 'California', 'Colorado', 'Oregon', 'Oregon State', 'Stanford', 'UCLA', 'USC', 'Utah', 'Washington', 'Washington State'}
P4_SCHOOLS = SEC_SCHOOLS | BIG_TEN_SCHOOLS | BIG_12_SCHOOLS | ACC_SCHOOLS | PAC12_SCHOOLS
P4_SCHOOLS_NO_PAC12 = SEC_SCHOOLS | BIG_TEN_SCHOOLS | BIG_12_SCHOOLS | ACC_SCHOOLS

def is_p4(row):
    s = row.get('School')
    if pd.isna(s) or s == '': return 0
    sn = school_alias.get(s, s)
    year = row.get('Year', 2023)
    schools = P4_SCHOOLS if year <= 2023 else P4_SCHOOLS_NO_PAC12
    return 1 if sn in schools else 0
df['p4_conference'] = df.apply(is_p4, axis=1)

# Contains flags (all features)
df['contains_broad_jump'] = df['Broad Jump'].notna().astype(int)
df['contains_vertical'] = df['Vertical'].notna().astype(int)
df['contains_40yd'] = df['40yd'].notna().astype(int)
df['contains_height'] = df['Height'].notna().astype(int)
df['contains_weight'] = df['Weight'].notna().astype(int)
df['contains_speed_score'] = df['speed_score'].notna().astype(int)
df['contains_explosive_score'] = 1
df['contains_ras'] = df['RAS'].notna().astype(int)
df['contains_arm_length_inches'] = df['arm_length_inches'].notna().astype(int) if 'arm_length_inches' in df.columns else 0
df['contains_true_pass_set_pass_rush_win_rate'] = df['true_pass_set_pass_rush_win_rate'].notna().astype(int)
df['contains_pass_rush_win_rate'] = df['pass_rush_win_rate'].notna().astype(int)
df['contains_snap_counts_pass_rush'] = df['snap_counts_pass_rush'].notna().astype(int)
df['contains_stop_percent'] = df['stop_percent'].notna().astype(int) if 'stop_percent' in df.columns else 0
df['contains_missed_tackle_rate'] = df['missed_tackle_rate'].notna().astype(int) if 'missed_tackle_rate' in df.columns else 0
df['contains_avg_depth_of_tackle'] = df['avg_depth_of_tackle'].notna().astype(int) if 'avg_depth_of_tackle' in df.columns else 0
df['contains_snap_counts_run'] = df['snap_counts_run'].notna().astype(int) if 'snap_counts_run' in df.columns else 0
df['contains_forced_fumbles'] = df['forced_fumbles'].notna().astype(int) if 'forced_fumbles' in df.columns else 0
df['contains_yards_per_coverage_snap'] = df['yards_per_coverage_snap'].notna().astype(int) if 'yards_per_coverage_snap' in df.columns else 0
df['contains_forced_incompletion_rate'] = df['forced_incompletion_rate'].notna().astype(int) if 'forced_incompletion_rate' in df.columns else 0
df['contains_snap_counts_coverage'] = df['snap_counts_coverage'].notna().astype(int) if 'snap_counts_coverage' in df.columns else 0
df['contains_coverage_percent'] = df['coverage_percent'].notna().astype(int) if 'coverage_percent' in df.columns else 0
df['contains_interceptions'] = df['interceptions'].notna().astype(int) if 'interceptions' in df.columns else 0
df['contains_pass_break_ups'] = df['pass_break_ups'].notna().astype(int) if 'pass_break_ups' in df.columns else 0
df['contains_coverage_snaps_per_target'] = df['coverage_snaps_per_target'].notna().astype(int) if 'coverage_snaps_per_target' in df.columns else 0
df['contains_INT_rate'] = df['INT_rate'].notna().astype(int) if 'INT_rate' in df.columns else 0
df['contains_PBU_rate'] = df['PBU_rate'].notna().astype(int) if 'PBU_rate' in df.columns else 0
df['contains_p4_conference'] = df['School'].notna().astype(int)

In [78]:
# Target: round 1–7 if drafted, 8 if undrafted
y = np.where(
    df['Drafted'].astype(bool),
    np.clip(df['Round'].fillna(1).astype(int), 1, 7),
    8
)
X_raw = df[FEATURES_WITH_COLLEGE_ALL].copy()

# KNN imputation + scale
imputer = KNNImputer(n_neighbors=10)
X = imputer.fit_transform(X_raw)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Ridge regression
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_scaled, y)

y_pred_train = np.clip(ridge.predict(X_scaled), 1, 8)
print('Train MAE (round 1–8):', round(mean_absolute_error(y, y_pred_train), 4))
print('Train samples:', len(y))

Train MAE (round 1–8): 1.5415
Train samples: 327


In [79]:
def prepare_lb_df(ldf, year):
    """Add Height inches, speed_score, explosive_score, p4, contains_* to a LB dataframe (RAS/PFF already in CSV)."""
    ldf = ldf.copy()
    ldf['Year'] = year
    if ldf['Height'].dtype == object or (ldf['Height'].astype(str).str.contains('-', na=False).any()):
        ldf['Height'] = ldf['Height'].apply(height_inches)
    else:
        ldf['Height'] = pd.to_numeric(ldf['Height'], errors='coerce')
    ldf['speed_score'] = np.where(
        ldf['40yd'].notna() & (ldf['40yd'] > 0),
        ldf['Weight'] * 200 / (ldf['40yd'] ** 4),
        np.nan
    )
    ldf['explosive_score'] = (ldf['Vertical'] - mean_v).fillna(0) / std_v + (ldf['Broad Jump'] - mean_b).fillna(0) / std_b
    ldf['p4_conference'] = ldf.apply(is_p4, axis=1)
    ldf['contains_broad_jump'] = ldf['Broad Jump'].notna().astype(int)
    ldf['contains_vertical'] = ldf['Vertical'].notna().astype(int)
    ldf['contains_40yd'] = ldf['40yd'].notna().astype(int)
    ldf['contains_height'] = ldf['Height'].notna().astype(int)
    ldf['contains_weight'] = ldf['Weight'].notna().astype(int)
    ldf['contains_speed_score'] = ldf['speed_score'].notna().astype(int)
    ldf['contains_explosive_score'] = 1
    ldf['contains_ras'] = ldf['RAS'].notna().astype(int)
    ldf['contains_arm_length_inches'] = ldf['arm_length_inches'].notna().astype(int) if 'arm_length_inches' in ldf.columns else 0
    ldf['contains_true_pass_set_pass_rush_win_rate'] = ldf['true_pass_set_pass_rush_win_rate'].notna().astype(int) if 'true_pass_set_pass_rush_win_rate' in ldf.columns else 0
    ldf['contains_pass_rush_win_rate'] = ldf['pass_rush_win_rate'].notna().astype(int) if 'pass_rush_win_rate' in ldf.columns else 0
    ldf['contains_snap_counts_pass_rush'] = ldf['snap_counts_pass_rush'].notna().astype(int) if 'snap_counts_pass_rush' in ldf.columns else 0
    ldf['contains_stop_percent'] = ldf['stop_percent'].notna().astype(int) if 'stop_percent' in ldf.columns else 0
    ldf['contains_missed_tackle_rate'] = ldf['missed_tackle_rate'].notna().astype(int) if 'missed_tackle_rate' in ldf.columns else 0
    ldf['contains_avg_depth_of_tackle'] = ldf['avg_depth_of_tackle'].notna().astype(int) if 'avg_depth_of_tackle' in ldf.columns else 0
    ldf['contains_snap_counts_run'] = ldf['snap_counts_run'].notna().astype(int) if 'snap_counts_run' in ldf.columns else 0
    ldf['contains_forced_fumbles'] = ldf['forced_fumbles'].notna().astype(int) if 'forced_fumbles' in ldf.columns else 0
    ldf['contains_yards_per_coverage_snap'] = ldf['yards_per_coverage_snap'].notna().astype(int) if 'yards_per_coverage_snap' in ldf.columns else 0
    ldf['contains_forced_incompletion_rate'] = ldf['forced_incompletion_rate'].notna().astype(int) if 'forced_incompletion_rate' in ldf.columns else 0
    ldf['contains_snap_counts_coverage'] = ldf['snap_counts_coverage'].notna().astype(int) if 'snap_counts_coverage' in ldf.columns else 0
    ldf['contains_coverage_percent'] = ldf['coverage_percent'].notna().astype(int) if 'coverage_percent' in ldf.columns else 0
    ldf['contains_interceptions'] = ldf['interceptions'].notna().astype(int) if 'interceptions' in ldf.columns else 0
    ldf['contains_pass_break_ups'] = ldf['pass_break_ups'].notna().astype(int) if 'pass_break_ups' in ldf.columns else 0
    ldf['contains_coverage_snaps_per_target'] = ldf['coverage_snaps_per_target'].notna().astype(int) if 'coverage_snaps_per_target' in ldf.columns else 0
    ldf['contains_INT_rate'] = ldf['INT_rate'].notna().astype(int) if 'INT_rate' in ldf.columns else 0
    ldf['contains_PBU_rate'] = ldf['PBU_rate'].notna().astype(int) if 'PBU_rate' in ldf.columns else 0
    ldf['contains_p4_conference'] = ldf['School'].notna().astype(int)
    return ldf

# 2024 and 2025 from lb_testing.csv (PFF/RAS already merged in data_cleaning)
lb_testing = pd.read_csv('../data/processed/lb_testing.csv')
lb_2024 = prepare_lb_df(lb_testing[lb_testing['Year'] == 2024], 2024)
lb_2025 = prepare_lb_df(lb_testing[lb_testing['Year'] == 2025], 2025)

X_24_raw = lb_2024[FEATURES_WITH_COLLEGE_ALL].copy()
X_25_raw = lb_2025[FEATURES_WITH_COLLEGE_ALL].copy()
X_24 = imputer.transform(X_24_raw)
X_25 = imputer.transform(X_25_raw)
X_24_scaled = scaler.transform(X_24)
X_25_scaled = scaler.transform(X_25)

pred_24 = np.clip(ridge.predict(X_24_scaled), 1, 8)
pred_25 = np.clip(ridge.predict(X_25_scaled), 1, 8)

actual_24 = lb_2024['Round'].astype(int).values
actual_25 = lb_2025['Round'].astype(int).values

def eval_metrics(actual, pred, label):
    mae = mean_absolute_error(actual, pred)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    r2 = r2_score(actual, pred)
    exact = (np.round(pred) == actual).mean()
    within_1 = (np.abs(np.round(pred) - actual) <= 1).mean()
    print(f'{label} (n={len(actual)}): MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}, Exact={exact:.2%}, Within-1={within_1:.2%}')

print('2024 LBs:')
eval_metrics(actual_24, pred_24, '2024')
print('2025 LBs:')
eval_metrics(actual_25, pred_25, '2025')

2024 LBs:
2024 (n=14): MAE=1.5209, RMSE=1.7451, R²=0.1064, Exact=14.29%, Within-1=50.00%
2025 LBs:
2025 (n=21): MAE=1.6381, RMSE=1.9054, R²=-0.2568, Exact=14.29%, Within-1=42.86%


In [80]:
# Dataframes: players with actual round, model prediction, tier label, and interpretation
def pred_round_to_tier(p):
    if p < 1.75: return ('Round 1 Tier', 'True 1st-round grade')
    if p < 2.75: return ('Round 2 Tier', 'Early Day 2')
    if p < 3.75: return ('Round 3 Tier', 'Late Day 2')
    if p < 4.75: return ('Round 4 Tier', 'Early Day 3')
    if p < 5.75: return ('Round 5 Tier', 'Mid Day 3')
    if p < 6.75: return ('Round 6 Tier', 'Late Day 3')
    return ('Round 7 / UDFA Tier', 'Fringe draftable')

lb_2024_display = lb_2024[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
lb_2024_display['predicted_round'] = pred_24
lb_2024_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_24]
lb_2024_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_24]
lb_2024_display['Round'] = lb_2024_display['Round'].astype(int)

lb_2025_display = lb_2025[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
lb_2025_display['predicted_round'] = pred_25
lb_2025_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_25]
lb_2025_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_25]
lb_2025_display['Round'] = lb_2025_display['Round'].astype(int)

print('2024 drafted LBs')
display(lb_2024_display)
print('2025 drafted LBs')
display(lb_2025_display)

2024 drafted LBs


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,1,17.0,Dallas Turner,Alabama,2024,2.126309,Round 2 Tier,Early Day 2
1,2,45.0,Edgerrin Cooper,Texas A&M,2024,3.155829,Round 3 Tier,Late Day 2
2,2,52.0,Junior Colson,Michigan,2024,4.154625,Round 4 Tier,Early Day 3
3,3,72.0,Trevin Wallace,Kentucky,2024,3.707813,Round 3 Tier,Late Day 2
4,3,98.0,Payton Wilson,NC State,2024,2.581091,Round 2 Tier,Early Day 2
5,3,87.0,Marist Liufau,Notre Dame,2024,5.954651,Round 6 Tier,Late Day 3
6,4,114.0,Jaylan Ford,Texas,2024,4.895393,Round 5 Tier,Mid Day 3
7,4,118.0,Tyrice Knight,UTEP,2024,5.994552,Round 6 Tier,Late Day 3
8,5,149.0,Edefuan Ulofoshio,Washington,2024,2.993436,Round 3 Tier,Late Day 2
9,5,160.0,Steele Chambers,Ohio State,2024,8.0,Round 7 / UDFA Tier,Fringe draftable


2025 drafted LBs


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
14,1,15.0,Jalon Walker,Georgia,2025,5.400312,Round 5 Tier,Mid Day 3
15,1,31.0,Jihaad Campbell,Alabama,2025,4.447096,Round 4 Tier,Early Day 3
16,2,33.0,Carson Schwesinger,UCLA,2025,2.800813,Round 3 Tier,Late Day 2
17,2,49.0,Demetrius Knight Jr,South Carolina,2025,4.243448,Round 4 Tier,Early Day 3
18,3,75.0,Nick Martin,Oklahoma State,2025,5.22522,Round 5 Tier,Mid Day 3
19,4,107.0,Jack Kiser,Notre Dame,2025,5.990115,Round 6 Tier,Late Day 3
20,4,112.0,Danny Stutsman,Oklahoma,2025,3.893542,Round 4 Tier,Early Day 3
21,4,115.0,Cody Simon,Ohio State,2025,4.954811,Round 5 Tier,Mid Day 3
22,4,119.0,Barrett Carter,Clemson,2025,5.327638,Round 5 Tier,Mid Day 3
23,4,129.0,Teddye Buchanan,California,2025,4.956873,Round 5 Tier,Mid Day 3


In [81]:
# 2026 evaluation (lb_drafted_2026.csv has PFF/RAS from data_cleaning)
lb_2026 = prepare_lb_df(pd.read_csv('lb_drafted_2026.csv'), 2026)

X_26_raw = lb_2026[FEATURES_WITH_COLLEGE_ALL].copy()
X_26 = imputer.transform(X_26_raw)
X_26_scaled = scaler.transform(X_26)

pred_26 = np.clip(ridge.predict(X_26_scaled), 1, 8)

# Check if Round column exists and has valid values for evaluation
if 'Round' in lb_2026.columns and lb_2026['Round'].notna().any():
    actual_26 = lb_2026['Round'].astype(int).values
    print('2026 LBs:')
    eval_metrics(actual_26, pred_26, '2026')
else:
    print(f'2026 LBs (n={len(pred_26)}): Predictions generated (no actual rounds available)')

# Display 2026 predictions
lb_2026_display = lb_2026[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
lb_2026_display['predicted_round'] = pred_26
lb_2026_display['tier_label'] = [pred_round_to_tier(x)[0] for x in pred_26]
lb_2026_display['interpretation'] = [pred_round_to_tier(x)[1] for x in pred_26]
if 'Round' in lb_2026_display.columns and lb_2026_display['Round'].notna().any():
    lb_2026_display['Round'] = lb_2026_display['Round'].astype(int)

print('\n2026 drafted LBs')
display(lb_2026_display)

2026 LBs (n=18): Predictions generated (no actual rounds available)

2026 drafted LBs


Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,,,Arvell Reese,Ohio State,2026,4.125178,Round 4 Tier,Early Day 3
1,,,Sonny Styles,Ohio State,2026,2.730182,Round 2 Tier,Early Day 2
2,,,CJ Allen,Georgia,2026,3.88644,Round 4 Tier,Early Day 3
3,,,Anthony Hill Jr,Texas,2026,3.89516,Round 4 Tier,Early Day 3
4,,,Deontae Lawson,Alabama,2026,4.961789,Round 5 Tier,Mid Day 3
5,,,Josiah Trotter,Missouri,2026,3.535056,Round 3 Tier,Late Day 2
6,,,Jake Golday,Cincinnati,2026,4.610039,Round 4 Tier,Early Day 3
7,,,Taurean York,Texas A&M,2026,4.9631,Round 5 Tier,Mid Day 3
8,,,Jacob Rodriguez,Texas Tech,2026,3.982548,Round 4 Tier,Early Day 3
9,,,Harold Perkins Jr,LSU,2026,4.983731,Round 5 Tier,Mid Day 3


In [82]:
# Model results on entire training set (2017–2023), ordered by predicted_round
train_display = df[['Round', 'Pick', 'Player', 'School', 'Year']].copy()
train_display['predicted_round'] = y_pred_train
train_display['tier_label'] = [pred_round_to_tier(x)[0] for x in y_pred_train]
train_display['interpretation'] = [pred_round_to_tier(x)[1] for x in y_pred_train]
train_display = train_display.sort_values('predicted_round').reset_index(drop=True)
train_display

Unnamed: 0,Round,Pick,Player,School,Year,predicted_round,tier_label,interpretation
0,1.0,12.0,Micah Parsons,Penn St.,2021,1.000000,Round 1 Tier,True 1st-round grade
1,1.0,19.0,Leighton Vander esch,Boise State,2018,1.096150,Round 1 Tier,True 1st-round grade
2,1.0,8.0,Isaiah Simmons,Clemson,2020,1.115848,Round 1 Tier,True 1st-round grade
3,1.0,5.0,Devin White,LSU,2019,1.771152,Round 2 Tier,Early Day 2
4,3.0,65.0,Logan Wilson,Wyoming,2020,2.244886,Round 2 Tier,Early Day 2
...,...,...,...,...,...,...,...,...
322,,,Blake Gallagher,Northwestern,2021,8.000000,Round 7 / UDFA Tier,Fringe draftable
323,,,Calvin Bundage,Oklahoma St.,2021,8.000000,Round 7 / UDFA Tier,Fringe draftable
324,5.0,176.0,K.J. Britt,Auburn,2021,8.000000,Round 7 / UDFA Tier,Fringe draftable
325,,,Trystan Slinker,Baylor,2021,8.000000,Round 7 / UDFA Tier,Fringe draftable
