# Importing of Dataset and Dependencies

In [3]:
#Import Dependencies
import numpy as np
import pandas as pd

In [9]:
#Read the percent_stats csv file into a pandas DataFrame
percent_stats_df = pd.read_csv('../Data/percent_stats.csv')
percent_stats_df = percent_stats_df[percent_stats_df.Year >= 2005]
percent_stats_df.head(10)

Unnamed: 0.1,Unnamed: 0,Year,Pos,TRB%,AST%,STL%,BLK%,TOV%,USG%,3P%,2P%,FT%
9782,9782,2005.0,PF,12.1,10.7,1.4,1.0,13.0,22.1,0.385,0.51,0.866
9783,9783,2005.0,SG,8.2,27.7,2.3,0.4,25.9,16.2,0.421,0.267,0.75
9784,9784,2005.0,PF,12.2,7.0,0.9,3.3,11.8,17.5,0.0,0.461,0.929
9785,9785,2005.0,PF,9.9,4.6,1.0,3.1,6.1,18.7,0.0,0.485,0.929
9786,9786,2005.0,SG,6.9,18.1,1.5,0.1,9.2,28.0,0.376,0.459,0.883
9787,9787,2005.0,SG,10.2,8.2,3.1,1.4,14.5,18.9,0.387,0.483,0.737
9788,9788,2005.0,PG,5.8,31.7,2.2,0.2,13.5,21.2,0.357,0.451,0.74
9789,9789,2005.0,C,17.0,9.1,0.5,5.6,12.7,16.4,0.0,0.538,0.689
9790,9790,2005.0,SG,6.0,18.9,1.6,0.2,14.0,18.5,0.384,0.392,0.805
9791,9791,2005.0,PG,6.7,21.8,2.4,0.0,18.2,15.8,0.462,0.424,0.73


# Definition of Selected Features and Test/Train Splitting

In [10]:
#Define the selected features for the percentage model
percent_selected_features = ['TRB%','AST%','BLK%','TOV%','3P%','USG%']

#Define X and y sets
X = percent_stats_df[percent_selected_features]
y = percent_stats_df.values[:, 2]

#Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

#Print the X_train dataframe
X_train.head()

Unnamed: 0,TRB%,AST%,BLK%,TOV%,3P%,USG%
15984,10.8,16.5,0.3,14.7,0.222,17.7
13308,23.2,9.2,6.9,18.9,0.0,16.5
11606,17.1,0.0,0.0,37.4,0.0,32.3
13682,14.4,6.3,4.4,5.1,0.0,16.4
13324,7.4,12.4,0.5,18.7,0.3,22.8


# Scaling of Dataset using StandardScaler

In [11]:
#Import the StandardScaler for scaling the dataset
from sklearn.preprocessing import StandardScaler

#Generate the scaling function for the features
X_scaler = StandardScaler().fit(X_train)

#Apply the scaling function to the features
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Creation of the SVC Model and GridSearch Estimator

In [12]:
# Create the SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model

SVC(kernel='linear')

In [13]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

# Fitting of the Model Using GridSearch

In [14]:
# Fit the model using the grid search estimator. 
# This will take the SVC model and try each combination of parameters
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1, gamma=0.0001, score=0.625, total=   0.8s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.604, total=   0.7s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.5s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.630, total=   1.7s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.618, total=   0.6s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.632, total=   0.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.625, total=   0.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.604, total=   0.9s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.630, total=   0.7s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.618, total=   0.5s
[CV] C=1, gamma=0.001 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:   47.1s finished


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             verbose=3)

# Model Summary

In [15]:
# List the best parameters for this dataset
print(grid.best_params_)

{'C': 10, 'gamma': 0.0001}


In [16]:
# List the best score
print(grid.best_score_)

0.6223021228389893


# Model Predictions

In [17]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test_scaled)

In [34]:
# Print the overall accuracy of the model
print('Test Accuracy: %.3f' % grid.score(X_test_scaled, y_test))

Test Accuracy: 0.616


In [32]:
# Count the support of each target in the y_test array
unique, counts = np.unique(y_test, return_counts=True)
dict(zip(unique, counts))

{'C': 354, 'PF': 357, 'PG': 308, 'SF': 332, 'SG': 362}

In [33]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["C", "PF", "PG", "SF", "SG"]))

              precision    recall  f1-score   support

           C       0.66      0.67      0.67       354
          PF       0.53      0.48      0.50       357
          PG       0.77      0.81      0.79       308
          SF       0.55      0.53      0.54       332
          SG       0.58      0.61      0.59       362

    accuracy                           0.62      1713
   macro avg       0.62      0.62      0.62      1713
weighted avg       0.61      0.62      0.61      1713

