In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv('seasons.csv')
df = df[['Pos', 'FG%', 'FT%', '3P%','TRB', 'AST', 'BLK']]
target = df['Pos']
del df['Pos']
df['3P%'].fillna(df['3P%'].median(), inplace=True)
df['FT%'].fillna(df['FT%'].median(), inplace=True)
df['FG%'].fillna(df['FG%'].median(), inplace=True)
df

Unnamed: 0,FG%,FT%,3P%,TRB,AST,BLK
0,0.592,0.582,0.3330,9.3,2.3,1.1
1,0.557,0.691,0.1430,10.2,5.1,1.3
2,0.493,0.827,0.3890,7.4,2.4,1.6
3,0.500,0.769,0.3375,1.5,0.0,0.0
4,0.368,0.676,0.3460,1.8,1.9,0.2
...,...,...,...,...,...,...
495,0.624,0.465,0.3375,5.7,1.8,1.4
496,0.472,0.857,0.5000,2.0,0.4,0.4
497,0.600,1.000,0.5000,0.3,0.5,0.3
498,0.333,0.798,0.2790,2.1,2.4,0.2


In [3]:
X_train, X_test, y_train, y_test = train_test_split(df, target, random_state=0)
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))
knn = KNeighborsClassifier(n_neighbors=5)
rfc = RandomForestClassifier(max_depth=10, random_state=20)
dtc = DecisionTreeClassifier(random_state=5)
knn.fit(X_train, y_train)
rfc.fit(X_train, y_train)
dtc.fit(X_train, y_train)

X_train shape: (375, 6)
y_train shape: (375,)
X_test shape: (125, 6)
y_test shape: (125,)


DecisionTreeClassifier(random_state=5)

In [5]:
X_new = np.array([[0.477, 0.782, 0.335, 6.5, 1.0, 3.4]])
print("X_new.shape: {}".format(X_new.shape))
knn_prediction = knn.predict(X_new)
rfc_prediction = dtc.predict(X_new)
dtc_prediction = rfc.predict(X_new)
print("Prediction with KNeighbors Classifier: {}".format(knn_prediction))
print("Prediction with Random Forest Classifier: {}".format(rfc_prediction))
print("Prediction with Decision Tree Classifier: {}".format(dtc_prediction))

X_new.shape: (1, 6)
Prediction with KNeighbors Classifier: ['C']
Prediction with Random Forest Classifier: ['PF']
Prediction with Decision Tree Classifier: ['C']


In [6]:
y_pred = knn.predict(X_test)
print("Test set predictions of KNeighbors Classifier:\n {}".format(y_pred))
print("Test set score (np.mean): {:.2f}".format(np.mean(y_pred == y_test)))
y_pred = rfc.predict(X_test)
print("Test set predictions of Random Forest Classifier:\n {}".format(y_pred))
print("Test set score (np.mean): {:.2f}".format(np.mean(y_pred == y_test)))
y_pred = dtc.predict(X_test)
print("Test set predictions of Decision Tree Classifier:\n {}".format(y_pred))
print("Test set score (np.mean): {:.2f}".format(np.mean(y_pred == y_test)))

Test set predictions of KNeighbors Classifier:
 ['C' 'C' 'SG' 'SF' 'PG' 'PF' 'PG' 'PG' 'SG' 'PF' 'PG' 'PF' 'PG' 'PG' 'SF'
 'SF' 'PF' 'SG' 'PF' 'SF' 'SG' 'PF' 'C' 'SF' 'PF' 'PF' 'PG' 'PG' 'SG' 'PG'
 'PF' 'PF' 'PG' 'C' 'C' 'C' 'SG' 'PG' 'PF' 'PG' 'SG' 'PF' 'PG' 'SG' 'PF'
 'C' 'PF' 'SF' 'SF' 'C' 'PG' 'PF' 'PG' 'C' 'C' 'C' 'C' 'C' 'PF' 'C' 'PF'
 'C' 'SF' 'PG' 'SF' 'SG' 'SG' 'PF' 'PF' 'SG' 'C' 'C' 'PF' 'PF' 'PG' 'PF'
 'SG' 'C' 'PF' 'PF' 'PG' 'SF' 'SF' 'C' 'SF' 'SF' 'SF' 'SG' 'SF' 'C' 'PF'
 'PG' 'C' 'SG' 'SG' 'C' 'C' 'SG' 'SG' 'PF' 'C' 'PF' 'SG' 'PF' 'C' 'PG'
 'SF' 'SF' 'SG' 'PF' 'C' 'PG' 'C' 'PF' 'PG' 'SG' 'PG' 'SG' 'PG' 'PF' 'SG'
 'PF' 'SG' 'PF' 'SG']
Test set score (np.mean): 0.50
Test set predictions of Random Forest Classifier:
 ['C' 'C' 'SG' 'SG' 'SG' 'PF' 'PG' 'SG' 'SG' 'SF' 'SG' 'PF' 'SG' 'PG' 'SF'
 'C' 'SG' 'SF' 'PF' 'PF' 'PF' 'SF' 'SF' 'PF' 'C' 'PF' 'PG' 'SG' 'SF' 'PG'
 'SG' 'C' 'PG' 'C' 'SF' 'C' 'SG' 'PG' 'PF' 'SG' 'SG' 'C' 'PG' 'SG' 'C' 'C'
 'SG' 'SF' 'SF' 'C' 'PG' 'C' 'PG' 'C' '