In [107]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load the CSV file
data = pd.read_csv("2023_nba_player_stats.csv")

# Select relevant features for similarity computation (excluding 'Min')
features = ['PTS', 'FG%', '3P%', 'FT%', 'REB', 'AST', 'STL', 'BLK']

# Normalize the data
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data[features])

# Compute cosine similarity
similarity_matrix = cosine_similarity(data_normalized)

# Define function to get top N similar players with their data
def get_top_similar_players(player_index, similarity_matrix, data):
    # Get similarity scores for the player
    player_similarities = similarity_matrix[player_index]
    # Sort indices based on similarity scores
    similar_player_indices = player_similarities.argsort()[::-1] # Exclude the player itself
    # Get player names
    similar_players = data.iloc[similar_player_indices]['PName'].values
    return similar_players

# Get top 10 similar players for Jayson Tatum and Joel Embiid
jayson_top_similar_players = get_top_similar_players(jayson_index, similarity_matrix, data)
joel_top_similar_players = get_top_similar_players(joel_index, similarity_matrix, data)

# Display only the top 10 similar players
print("Top 10 similar players to Jayson Tatum:")
print(jayson_top_similar_players[:10])  # Slice to include only the first 10 elements
print("\nTop 10 similar players to Joel Embiid:")
print(joel_top_similar_players[:10]) 
similar_players_dict = {}

# Populate the dictionary
similar_players_dict['Jayson Tatum'] = list(jayson_top_similar_players)

# Display the dictionary
print("\nDictionary of similar players to Jayson Tatum:")
print(similar_players_dict)

Top 10 similar players to Jayson Tatum:
['Jayson Tatum' 'Paolo Banchero' 'Pascal Siakam' 'Jaylen Brown'
 'LeBron James' 'Julius Randle' 'Luka Doncic' 'Giannis Antetokounmpo'
 'Joel Embiid' 'Zach LaVine']

Top 10 similar players to Joel Embiid:
['Joel Embiid' 'Kristaps Porzingis' 'Anthony Davis' 'Jayson Tatum'
 'Paolo Banchero' 'Giannis Antetokounmpo' 'Evan Mobley' 'P.J. Washington'
 'Aaron Gordon' 'Pascal Siakam']

Dictionary of similar players to Jayson Tatum:
{'Jayson Tatum': ['Jayson Tatum', 'Paolo Banchero', 'Pascal Siakam', 'Jaylen Brown', 'LeBron James', 'Julius Randle', 'Luka Doncic', 'Giannis Antetokounmpo', 'Joel Embiid', 'Zach LaVine', 'DeMar DeRozan', 'Kyle Kuzma', 'Jalen Green', 'Mikal Bridges', 'CJ McCollum', 'Kyrie Irving', 'Shai Gilgeous-Alexander', 'Lauri Markkanen', 'Franz Wagner', "De'Aaron Fox", 'RJ Barrett', 'Bam Adebayo', 'Keldon Johnson', 'Anthony Edwards', 'Donovan Mitchell', 'Scottie Barnes', 'Stephen Curry', 'Ja Morant', 'Josh Giddey', 'Nikola Vucevic', 'Aaron 

In [95]:
# Function to get similarity scores for top N similar players
def get_similarity_scores(player_index, similar_player_indices, similarity_matrix):
    # Get similarity scores for the top similar players
    similarity_scores = similarity_matrix[player_index, similar_player_indices]
    return similarity_scores

# Find indices of top 10 similar players for Jayson Tatum and Joel Embiid
jayson_similar_indices = data[data['PName'].isin(jayson_similar_players)].index.values
joel_similar_indices = data[data['PName'].isin(joel_similar_players)].index.values

# Get similarity scores for top 10 similar players to Jayson Tatum
jayson_similarity_scores = get_similarity_scores(jayson_index, jayson_similar_indices, similarity_matrix)

# Get similarity scores for top 10 similar players to Joel Embiid
joel_similarity_scores = get_similarity_scores(joel_index, joel_similar_indices, similarity_matrix)

# Print similarity scores for Jayson Tatum
print("\nSimilarity scores for top 10 similar players to Jayson Tatum:")
print(jayson_similarity_scores)

# Print similarity scores for Joel Embiid
print("\nSimilarity scores for top 10 similar players to Joel Embiid:")
print(joel_similarity_scores)



Similarity scores for top 10 similar players to Jayson Tatum:
[0.98099206 0.98388431 0.98327169 0.98406603 0.98074956 0.97930784
 0.99072187 0.99106775 0.98409681 0.9927205 ]

Similarity scores for top 10 similar players to Joel Embiid:
[0.98099206 0.97497229 0.96567247 0.96555947 0.98765994 0.98266475
 0.97644833 0.97483501 0.97039368 0.96704438]


In [90]:
# Function to categorize similarity scores
def categorize_similarity_scores(similarity_scores):
    categories = []
    for score in similarity_scores:
        if score >= 0.90:
            categories.append("Very Similar")
        elif 0.80 <= score <= 0.89:
            categories.append("Moderately Similar")
        elif 0.7 <= score <= 0.79:
            categories.append("Less Similar")
    return categories

# Categorize similarity scores for Jayson Tatum
jayson_similarity_categories = categorize_similarity_scores(jayson_similarity_scores)

# Categorize similarity scores for Joel Embiid
joel_similarity_categories = categorize_similarity_scores(joel_similarity_scores)

# Print categories for Jayson Tatum
print("\nCategories for top 10 similar players to Jayson Tatum:")
print(jayson_similarity_categories)

# Print categories for Joel Embiid
print("\nCategories for top 10 similar players to Joel Embiid:")
print(joel_similarity_categories)



Categories for top 10 similar players to Jayson Tatum:
['Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar']

Categories for top 10 similar players to Joel Embiid:
['Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar', 'Very Similar']


In [99]:
def get_all_similar_players_with_scores(player_index, similarity_matrix, data):
    # Get similarity scores for the player
    player_similarities = similarity_matrix[player_index]
    # Sort indices based on similarity scores
    similar_player_indices = player_similarities.argsort()[::-1][1:] # Exclude the player itself
    # Get player names
    similar_players = data.iloc[similar_player_indices]['PName'].values
    # Get similarity scores
    similarity_scores = player_similarities[similar_player_indices]
    return similar_players, similarity_scores

# Find the index of Jayson Tatum in the data
jayson_index = data.index[data['PName'] == 'Jayson Tatum'].tolist()[0]

# Get all similar players for Jayson Tatum along with their similarity scores
jayson_similar_players, jayson_similarity_scores = get_all_similar_players_with_scores(jayson_index, similarity_matrix, data)

# Create a dictionary to store similar players and their similarity scores
similar_players_dict = {}

# Populate the dictionary with similar players and their similarity scores
for player, score in zip(jayson_similar_players, jayson_similarity_scores):
    similar_players_dict[player] = score

# Display the dictionary
print("Similarity scores for players similar to Jayson Tatum:")
for player, score in similar_players_dict.items():
    print(player, score)


Similarity scores for players similar to Jayson Tatum:
Paolo Banchero 0.9927205049819267
Pascal Siakam 0.9910677514947449
Jaylen Brown 0.9907218723481154
LeBron James 0.9840968108491313
Julius Randle 0.9840660311593127
Luka Doncic 0.9838843115420738
Giannis Antetokounmpo 0.9832716949276415
Joel Embiid 0.9809920586076716
Zach LaVine 0.9807495578806485
DeMar DeRozan 0.9793078379574752
Kyle Kuzma 0.9783934684394806
Jalen Green 0.9773317608518364
Mikal Bridges 0.9747468331224342
CJ McCollum 0.9729858514286286
Kyrie Irving 0.9719739601535338
Shai Gilgeous-Alexander 0.9710785685974892
Lauri Markkanen 0.9696479653771177
Franz Wagner 0.9696454241110796
De'Aaron Fox 0.969467571275337
RJ Barrett 0.9690689388582499
Bam Adebayo 0.9689780192583851
Keldon Johnson 0.9685086241208964
Anthony Edwards 0.9683709835127601
Donovan Mitchell 0.9681589612801281
Scottie Barnes 0.9674250678742424
Stephen Curry 0.9665951778845921
Ja Morant 0.965208388106513
Josh Giddey 0.9649152864212112
Nikola Vucevic 0.9640609

In [101]:
# Define threshold values
thresholds = [0.9, 0.8, 0.7]

# Initialize dictionaries to store clusters for each threshold
clusters = {threshold: {} for threshold in thresholds}

# Iterate through similarity scores and group players into clusters for each threshold
for player, score in similar_players_dict.items():
    for threshold in thresholds:
        if score >= threshold:
            if score in clusters[threshold]:
                clusters[threshold][score].append(player)
            else:
                clusters[threshold][score] = [player]

# Display clusters
print("Clusters of players similar to Jayson Tatum:")
for threshold in thresholds:
    print(f"\nClusters with similarity score threshold >= {threshold}:")
    for score, players in clusters[threshold].items():
        print(f"  Cluster with similarity score {score}: {players}")


Clusters of players similar to Jayson Tatum:

Clusters with similarity score threshold >= 0.9:
  Cluster with similarity score 0.9927205049819267: ['Paolo Banchero']
  Cluster with similarity score 0.9910677514947449: ['Pascal Siakam']
  Cluster with similarity score 0.9907218723481154: ['Jaylen Brown']
  Cluster with similarity score 0.9840968108491313: ['LeBron James']
  Cluster with similarity score 0.9840660311593127: ['Julius Randle']
  Cluster with similarity score 0.9838843115420738: ['Luka Doncic']
  Cluster with similarity score 0.9832716949276415: ['Giannis Antetokounmpo']
  Cluster with similarity score 0.9809920586076716: ['Joel Embiid']
  Cluster with similarity score 0.9807495578806485: ['Zach LaVine']
  Cluster with similarity score 0.9793078379574752: ['DeMar DeRozan']
  Cluster with similarity score 0.9783934684394806: ['Kyle Kuzma']
  Cluster with similarity score 0.9773317608518364: ['Jalen Green']
  Cluster with similarity score 0.9747468331224342: ['Mikal Bridges']

In [105]:
# Define threshold values
thresholds = [0.9, 0.8, 0.7]

# Initialize dictionaries to store clusters for each threshold
clusters = {threshold: {} for threshold in thresholds}

# Iterate through similarity scores and group players into clusters for each threshold
for player, score in similar_players_dict.items():
    for threshold in thresholds:
        if score >= threshold:
            if threshold in clusters and len(clusters[threshold]) < 10:  # Limit to top 10 players per cluster
                if score in clusters[threshold]:
                    if len(clusters[threshold][score]) < 10:
                        clusters[threshold][score].append(player)
                else:
                    clusters[threshold][score] = [player]

# Display clusters
print("Clusters of players similar to Jayson Tatum:")
for threshold in thresholds:
    print(f"\nTop 10 clusters with similarity score threshold >= {threshold}:")
    for score, players in clusters[threshold].items():
        print(f"  Cluster with similarity score {score}: {players}")


Clusters of players similar to Jayson Tatum:

Top 10 clusters with similarity score threshold >= 0.9:
  Cluster with similarity score 0.9927205049819267: ['Paolo Banchero']
  Cluster with similarity score 0.9910677514947449: ['Pascal Siakam']
  Cluster with similarity score 0.9907218723481154: ['Jaylen Brown']
  Cluster with similarity score 0.9840968108491313: ['LeBron James']
  Cluster with similarity score 0.9840660311593127: ['Julius Randle']
  Cluster with similarity score 0.9838843115420738: ['Luka Doncic']
  Cluster with similarity score 0.9832716949276415: ['Giannis Antetokounmpo']
  Cluster with similarity score 0.9809920586076716: ['Joel Embiid']
  Cluster with similarity score 0.9807495578806485: ['Zach LaVine']
  Cluster with similarity score 0.9793078379574752: ['DeMar DeRozan']

Top 10 clusters with similarity score threshold >= 0.8:
  Cluster with similarity score 0.9927205049819267: ['Paolo Banchero']
  Cluster with similarity score 0.9910677514947449: ['Pascal Siakam']