In [1]:
# import dependencies
import pandas as pd
import joblib
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.svm import SVC 

In [2]:
# read in csv from S3 bucket
cleaned_df = pd.read_csv("https://uci-dataproject3.s3-us-west-1.amazonaws.com/AllTimeNbaSeason4Categories1990.csv")
cleaned_df.columns

Index(['Year', 'Player', 'Pos', 'Age', 'G', 'GS', 'MP', 'PER', 'USG%', 'WS',
       'BPM', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', 'FT', 'FTA',
       'FT%', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PPG', 'RPG',
       'APG', 'SPG', 'BPG', 'NBA_PER_Range'],
      dtype='object')

In [3]:
# assign X (data) and y (target)
X = cleaned_df[['PPG','APG','RPG','SPG','BPG','FG%','FT%','3P%']]

X_names = X.columns

y = cleaned_df['NBA_PER_Range'].astype('str')

y_names = ["End of the Bench", "Starter","MVP candidate", "All-Star"]

In [4]:
# split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# create a standard scaler model and fit it to the training data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# support vector machine linear classifier
model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)
print(f"Training Data Score: {model.score(X_train_scaled, y_train)}")

Training Data Score: 0.8175861340097231


In [7]:
# model accuracy
print('Test Acc: %.3f' % model.score(X_test_scaled, y_test))

Test Acc: 0.824


In [8]:
# calculate classification report
predictions = model.predict(X_test_scaled)
print(classification_report(y_test, predictions,
                            target_names=y_names))

                  precision    recall  f1-score   support

End of the Bench       0.68      0.39      0.50       170
         Starter       0.89      0.92      0.90      2176
   MVP candidate       0.83      0.15      0.26        33
        All-Star       0.66      0.67      0.67       775

        accuracy                           0.82      3154
       macro avg       0.77      0.54      0.58      3154
    weighted avg       0.82      0.82      0.82      3154



In [9]:
# create the GridSearchCV model
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=0)

In [10]:
# train the model 
grid.fit(X_train_scaled, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=None,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [11]:
# list the best parameters for this dataset
print(grid.best_params_)

{'C': 5, 'gamma': 0.0001}


In [12]:
# improved model accuracy
print('Test Acc: %.3f' % grid.score(X_test_scaled, y_test))

Test Acc: 0.832


In [13]:
# calculate classification report
predictions = grid.predict(X_test_scaled)
print(classification_report(y_test, predictions,
                            target_names=y_names))

                  precision    recall  f1-score   support

End of the Bench       0.76      0.51      0.61       170
         Starter       0.89      0.92      0.90      2176
   MVP candidate       0.94      0.48      0.64        33
        All-Star       0.67      0.67      0.67       775

        accuracy                           0.83      3154
       macro avg       0.82      0.65      0.71      3154
    weighted avg       0.83      0.83      0.83      3154



In [14]:
# save your model
filename = 'SVM_Model.sav'
joblib.dump(grid, filename)

['SVM_Model.sav']