In [1]:
# Load player counts with unmatched names
import pandas as pd
df = pd.read_csv("NBA_INTL_FIBA_Clustering.csv", encoding='latin-1', index_col=0)
df['CleanName'] = clean_names = [s.upper() for s in [''.join([c for c in n if c.isalpha() or c==' ' or c=='-']) for n in df.index]]
df = df.set_index('CleanName') # Reset index to formatted name to identify duplicates
df = df.groupby('CleanName').sum(min_count=1).rename_axis('Player')

# Function to recalculate clusters from cluster centres using Euclidean distance
import numpy as np
cluster_centres = pd.read_csv('cluster-centres.csv', index_col=0).transpose()
def get_cluster(player_stats):
    dists = [np.linalg.norm(cluster_centre - player_stats) for cluster_centre in cluster_centres.values]
    return np.argmin(dists)

# Reload cluster columns using new sums
leagues = ['FIBA', 'NBA', 'INTL']
dfs = [df[[c for c in df.columns if c.startswith(name)]].dropna() for name in leagues]
for i in range(3):
    dfs[i] = dfs[i].iloc[:, :-1] # Remove previous cluster column
    dfs[i] = dfs[i].div(dfs[i].sum(axis=1), axis=0) # Normalize by row
    df[leagues[i] + '_Cluster'] = dfs[i].apply(get_cluster, axis=1)

# Write new data file to CSV
df.to_csv("NBA_INTL_FIBA_Clustering.csv")
df

Unnamed: 0_level_0,FIBA_P&R Ball Handler,FIBA_Spot-Up,FIBA_Transition,FIBA_ISO,FIBA_Post-Up,FIBA_P&R Roll Man,FIBA_Cut,FIBA_Offensive Rebound,FIBA_Off Screen,FIBA_Hand Off,...,NBA_Spot-Up,NBA_Transition,NBA_ISO,NBA_Post-Up,NBA_P&R Roll Man,NBA_Cut,NBA_Offensive Rebound,NBA_Off Screen,NBA_Hand Off,NBA_Cluster
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AARON BEST,,,,,,,,,,,...,,,,,,,,,,
AARON BROOKS,,,,,,,,,,,...,474.0,377.0,498.0,0.0,2.0,23.0,134.0,64.0,172.0,2.0
AARON CEL,6.0,95.0,14.0,2.0,14.0,20.0,17.0,19.0,14.0,0.0,...,,,,,,,,,,
AARON CRAFT,,,,,,,,,,,...,,,,,,,,,,
AARON DOORNEKAMP,8.0,23.0,12.0,0.0,2.0,1.0,4.0,16.0,5.0,1.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZORAN PAUNOVIC,,,,,,,,,,,...,,,,,,,,,,
ZORAN PLANINIC,,,,,,,,,,,...,,,,,,,,,,
ZORAN VRKIC,,,,,,,,,,,...,,,,,,,,,,
ZVONKO BULJAN,,,,,,,,,,,...,,,,,,,,,,
