In [115]:
# Dependencies
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [116]:
def clean_data(df):        
    # Dropping columns that are not needed
    df = df.drop(['Pos'], axis=1)

    # Scaling the data
    Scaler = MinMaxScaler()
    df = pd.DataFrame(Scaler.fit_transform(df), columns=df.columns)
    
    return df

In [117]:
# Creating the model
df_train = pd.read_csv('Data/Training/overall_training_data.csv',index_col=0)
df_train = df_train.dropna(axis=0)
df_train = df_train.drop(['Player'], axis=1)
df_train = clean_data(df_train)

training_cols = [x for x in df_train.columns if x != 'All Star']
X_train = df_train[training_cols]
y_train = df_train['All Star']

KNN = KNeighborsClassifier(n_neighbors=61)
KNN = KNN.fit(X_train, y_train)
y_prediction = KNN.predict(X_train)

In [118]:
# Accuracy of model on training data using sklearn.metrics
print(metrics.accuracy_score(y_train, y_prediction))

0.9743991640543365


In [119]:
# Processing test data
df_test = pd.read_csv('Data/Testing/Test_Data.csv', index_col=0)
df_test = df_test.dropna(axis=0)
player_list = df_test['Player']

df_test = df_test.drop(['Player'], axis=1)
df_test = clean_data(df_test)

# Predict using the KNN Model
y_prediction = KNN.predict(df_test)

In [120]:
result = pd.DataFrame(player_list)
result['Predicted All Star'] = y_prediction.tolist()

# Predicted NBA all stars
predicted_all_stars = []
for index, row in result.iterrows():
    if result.at[index, 'Predicted All Star'] == 1.0:
        predicted_all_stars.append(result.at[index, 'Player'])

# Actual NBA all stars
actual_all_stars = ['Bradley Beal', 'Kyrie Irving', 'Giannis Antetokounmpo', 'Joel Embiid', 'Jaylen Brown', 
 'James Harden', 'Zach LaVine', 'Ben Simmons', 'Julius Randle', 'Jayson Tatum', 'Nikola Vučević',
 'Stephen Curry', 'Luka Dončić', 'LeBron James', 'Kawhi Leonard', 'Nikola Jokić', 'Anthony Davis', 'Damian Lillard', 
 'Donovan Mitchell', 'Chris Paul', 'Paul George', 'Zion Williamson', 'Rudy Gobert', 'Kevin Durant','Domantas Sabonis',
                   'Jimmy Butler']

result['Actual All Star'] = 0.0
for index, row in result.iterrows():
    if result.at[index, 'Player'] in actual_all_stars:
        result.at[index, 'Actual All Star'] = 1.0

In [121]:
# Determining accuracy of prediction
true_positive = 0
false_positive = 0
true_negative = 0
false_negative = 0
total = len(result)

for index, row in result.iterrows():
    if result.at[index, 'Predicted All Star'] == 1.0 and result.at[index, 'Actual All Star'] == 1.0:
        true_positive += 1
    elif result.at[index, 'Predicted All Star'] == 1.0 and result.at[index, 'Actual All Star'] == 0.0:
        false_positive += 1
    elif result.at[index, 'Predicted All Star'] == 0.0 and result.at[index, 'Actual All Star'] == 0.0:
        true_negative += 1
    else:
        false_negative += 1

accuracy = str(((true_positive + true_negative)/total)*100) + ' %'
recall = str((true_positive/len(predicted_all_stars))*100) + ' %'
print("Accuracy: ", accuracy)
print("Recall: ", recall)

Accuracy:  97.42173112338858 %
Recall:  85.0 %
