In [14]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = 'C:/Users/RaymondCarpenter/Documents/GitHub/14thstreetanalytics/nfl-draft/qb_sheet.csv'
df = pd.read_csv(file_path)

# Filter for the 2023 draft class and other draft classes
draft_class_2023 = df[df['draft_class'] == 2024]
other_draft_classes = df[df['draft_class'] != 2024]

# Selecting relevant numeric features for comparison
numeric_features = draft_class_2023.select_dtypes(include=['float64', 'int64']).columns.to_list()
numeric_features.remove('draft_class')  # Removing 'draft_class' as it's the same for all

# Handling missing values with mean imputation for both datasets
imputer = SimpleImputer(strategy='mean')
draft_class_2023_imputed = imputer.fit_transform(draft_class_2023[numeric_features])
other_draft_classes_imputed = imputer.transform(other_draft_classes[numeric_features])

# Normalizing the data for both datasets
scaler = StandardScaler()
draft_class_2023_scaled = scaler.fit_transform(draft_class_2023_imputed)
other_draft_classes_scaled = scaler.transform(other_draft_classes_imputed)

# Using Nearest Neighbors to find the most similar player from other draft classes
nbrs = NearestNeighbors(n_neighbors=1)  # Only the closest neighbor
nbrs.fit(other_draft_classes_scaled)

# Finding the nearest player for each 2023 draft class player
distances, indices = nbrs.kneighbors(draft_class_2023_scaled)

# Creating a mapping of 2023 draft class players to their most similar counterparts from other draft classes
similar_players_cross_class = {}
for i in range(len(draft_class_2023)):
    player_index = indices[i][0]  # Index 0 as we're now comparing with a different class
    player_name = draft_class_2023.iloc[i]['name']
    similar_player_name = other_draft_classes.iloc[player_index]['name']
    similar_players_cross_class[player_name] = similar_player_name

# Displaying the mapping
for player, similar_player in similar_players_cross_class.items():
    print(f"{player}: Most similar to {similar_player}")


Devin Leary: Most similar to Ryan Nassib
Michael Penix: Most similar to Derek Carr
Spencer Rattler: Most similar to Brock Purdy
J.J. McCarthy: Most similar to Zach Wilson
Joe Milton III: Most similar to Carson Wentz
Caleb Williams: Most similar to Sam Howell
Sam Hartman: Most similar to Clayton Tune
Michael Pratt: Most similar to Jevan Snead
Drake Maye: Most similar to Blake Bortles
Bo Nix: Most similar to Colt McCoy
Jordan Travis: Most similar to Dustin Crum
Jayden Daniels: Most similar to Jalen Hurts
