In [18]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.cluster import KMeans

# Data loading and processing

all_nba_data = pd.read_csv("2024_ALL_NBA_Player_Advanced.csv")
okc_data = pd.read_csv("2024 OKC Advanced.csv")

all_nba_data.fillna(0, inplace=True)
okc_data.fillna(0, inplace=True)


In [19]:
# Feature selection and Standardization

features = ['PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%', 'WS', 'BPM']

scaler = StandardScaler()
all_nba_scaled = scaler.fit_transform(all_nba_data[features])
okc_scaled = scaler.transform(okc_data[features])

In [20]:
# Naive Bayes Classifier

# Prepare labels for Naive Bayes (Thunder players as 1, others as 0)
y = (all_nba_data['Tm'] == 'OKC').astype(int)
X_train, X_test, y_train, y_test = train_test_split(all_nba_scaled, y, test_size=0.2, random_state=42)

nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)


In [21]:
# Support Vector Machine setup

svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
svm_predictions = svm.predict(all_nba_scaled)
best_fits_svm = all_nba_data.iloc[svm_predictions == 1]['Player']


In [22]:
# K-Means Clustering

kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(all_nba_scaled)
clusters = kmeans.labels_
okc_cluster = kmeans.predict(okc_scaled)
selected_players_cluster = all_nba_data.iloc[clusters == okc_cluster[0]]['Player']


In [23]:
# Output 

print("Best fits from SVM:", best_fits_svm.tolist())
print("Players similar to OKC style from Clustering:", selected_players_cluster.tolist())

Best fits from SVM: []
Players similar to OKC style from Clustering: ['Kyle Anderson', 'Giannis Antetokounmpo', 'Cole Anthony', 'Deni Avdija', 'LaMelo Ball', 'Paolo Banchero', 'Desmond Bane', 'Scottie Barnes', 'RJ Barrett', 'Bradley Beal', 'Devin Booker', 'Mikal Bridges', 'Malcolm Brogdon', 'Jaylen Brown', 'Jalen Brunson', 'Jimmy Butler', 'Mike Conley', 'Cade Cunningham', 'Stephen Curry', 'JD Davison', 'Dexter Dennis', 'DeMar DeRozan', 'Luka Dončić', 'Kevin Durant', 'Anthony Edwards', 'Joel Embiid', "De'Aaron Fox", 'Darius Garland', 'Paul George', 'Josh Giddey', 'Shai Gilgeous-Alexander', 'Aaron Gordon', 'Jerami Grant', 'RaiQuan Gray', 'Draymond Green', 'Jalen Green', 'Tyrese Haliburton', 'James Harden', 'Tobias Harris', 'Tyler Herro', 'Jrue Holiday', 'Brandon Ingram', 'Kyrie Irving', 'Jaren Jackson Jr.', 'LeBron James', 'Nikola Jokić', 'Tre Jones', 'Tyus Jones', 'Jonathan Kuminga', 'Kyle Kuzma', 'Kawhi Leonard', 'Caris LeVert', 'Damian Lillard', 'Lauri Markkanen', 'Tyrese Maxey', 'CJ 

In [31]:
svm = SVC(kernel='rbf', class_weight='balanced', C=10, gamma='scale')
svm.fit(X_train, y_train)

svm_predictions = svm.predict(all_nba_scaled)
best_fits_svm = all_nba_data.iloc[svm_predictions == 1]['Player']

okc_target_players = list(best_fits_svm) + list(selected_players_cluster)
print(okc_target_players)


['Grayson Allen', 'Harrison Barnes', 'Donte DiVincenzo', 'Luguentz Dort', 'Josh Giddey', 'Shai Gilgeous-Alexander', 'Sam Hauser', 'Chet Holmgren', 'Isaiah Joe', 'Keyontae Johnson', 'Brook Lopez', 'Trey Murphy III', 'Aaron Nesmith', 'Kristaps Porziņģis', 'Norman Powell', 'Olivier Sarr', 'Kyle Anderson', 'Giannis Antetokounmpo', 'Cole Anthony', 'Deni Avdija', 'LaMelo Ball', 'Paolo Banchero', 'Desmond Bane', 'Scottie Barnes', 'RJ Barrett', 'Bradley Beal', 'Devin Booker', 'Mikal Bridges', 'Malcolm Brogdon', 'Jaylen Brown', 'Jalen Brunson', 'Jimmy Butler', 'Mike Conley', 'Cade Cunningham', 'Stephen Curry', 'JD Davison', 'Dexter Dennis', 'DeMar DeRozan', 'Luka Dončić', 'Kevin Durant', 'Anthony Edwards', 'Joel Embiid', "De'Aaron Fox", 'Darius Garland', 'Paul George', 'Josh Giddey', 'Shai Gilgeous-Alexander', 'Aaron Gordon', 'Jerami Grant', 'RaiQuan Gray', 'Draymond Green', 'Jalen Green', 'Tyrese Haliburton', 'James Harden', 'Tobias Harris', 'Tyler Herro', 'Jrue Holiday', 'Brandon Ingram', 'Ky

In [35]:
salaries_data = pd.read_csv("nba_salaries.csv")
salaries_data.columns = salaries_data.iloc[0]
salaries_data = salaries_data[1:]

players_with_no_salary_2024_25 = salaries_data[salaries_data['2024-25'].isna()]['Player'].tolist()
players_with_no_salary_2024_25

intersection_players = list(set(players_with_no_salary_2024_25) & set(okc_target_players))
intersection_players

['Mike Conley',
 'Malik Monk',
 'Kyle Anderson',
 'Tyrese Maxey',
 'Immanuel Quickley',
 'Kelly Olynyk',
 'Pascal Siakam',
 'Grayson Allen',
 'Jrue Holiday',
 'Tobias Harris',
 'James Harden',
 'Tyus Jones',
 'DeMar DeRozan',
 'Dario Šarić']