In [26]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

file_path = 'C:/Users/RaymondCarpenter/Documents/GitHub/14thstreetanalytics/nfl-draft/rb_sheet.csv'
data = pd.read_csv(file_path)

numeric_columns = data.select_dtypes(include='number').columns
percentiles = data[numeric_columns].apply(lambda x: x.rank(pct=True))
data_with_percentiles = pd.concat([data, percentiles.add_suffix('_percentile')], axis=1)

normalized_fields = ['rush_attempts', 'rush_yards', 'rush_td', 'receptions', 'rec_yards', 'rec_td']
for field in normalized_fields:
    data_with_percentiles[field + '_per_game'] = data_with_percentiles[field] / data_with_percentiles['games_played']

normalized_features = ['height_in', 'weight_lbs', '40_yard_dash'] + [f + '_per_game' for f in normalized_fields]
players_2024 = data_with_percentiles[data_with_percentiles['draft_class'] == 2024].dropna(subset=normalized_features)
players_not_2024 = data_with_percentiles[data_with_percentiles['draft_class'] != 2024].dropna(subset=normalized_features)

scaler = StandardScaler()
features_2024_normalized = scaler.fit_transform(players_2024[normalized_features])
features_not_2024_normalized = scaler.transform(players_not_2024[normalized_features])

cosine_similarities_normalized = cosine_similarity(features_2024_normalized, features_not_2024_normalized)
closest_indices_cosine_normalized = cosine_similarities_normalized.argmax(axis=1)
closest_players_cosine_normalized = players_not_2024.iloc[closest_indices_cosine_normalized]['name'].values

similar_players_mapping_cosine_normalized = pd.DataFrame({
    '2024_Player': players_2024['name'].values,
    'Most_Similar_Non_2024_Cosine_Normalized': closest_players_cosine_normalized
})

player_name = input("Enter a 2024 player name to find the most similar player from non-2024 classes: ")
player_info = similar_players_mapping_cosine_normalized[similar_players_mapping_cosine_normalized['2024_Player'].str.contains(player_name, case=False)]
if player_info.empty:
    print("Player not found or no similar player available.")
else:
    print(player_info)


    2024_Player Most_Similar_Non_2024_Cosine_Normalized
2  Bucky Irving                            Jerrion Ealy
