# Importing of Dataset and Dependencies

In [3]:
#Import Dependencies
import numpy as np
import pandas as pd

In [4]:
#Read the percent_stats csv file into a pandas DataFrame
percent_stats_df = pd.read_csv('percent_stats.csv')
percent_stats_df.head(10)

Unnamed: 0.1,Unnamed: 0,Year,Pos,TRB%,AST%,STL%,BLK%,TOV%,USG%,3P%,2P%,FT%
0,0,1982.0,C,13.4,11.9,1.1,4.1,13.3,25.6,0.0,0.58,0.706
1,1,1982.0,C,13.6,22.1,2.3,1.9,14.8,22.8,0.0,0.494,0.781
2,2,1982.0,SF,9.7,18.6,1.2,0.9,12.7,29.8,0.352,0.475,0.68
3,3,1982.0,SG,5.5,19.7,3.1,0.3,17.5,21.5,0.294,0.363,0.862
4,4,1982.0,PG,2.9,31.9,1.1,0.1,18.4,17.9,0.375,0.475,0.747
5,5,1982.0,C,6.5,7.5,0.4,0.9,24.0,8.5,0.0,0.333,0.556
6,6,1982.0,C,0.0,0.0,0.0,0.0,66.7,17.6,0.0,1.0,0.0
7,7,1982.0,PF,14.5,10.2,0.8,2.1,21.4,20.5,0.0,0.477,0.364
8,8,1982.0,PF,15.0,5.7,1.4,3.5,18.4,20.3,0.0,0.523,0.624
9,9,1982.0,SF,11.7,13.1,2.2,0.4,7.7,22.0,0.409,0.476,0.83


# Definition of Selected Features and Test/Train Splitting

In [5]:
#Define the selected features for the percentage model
percent_selected_features = ['TRB%','AST%','BLK%','TOV%','3P%','USG%']

#Define X and y sets
X = percent_stats_df[percent_selected_features]
y = percent_stats_df.values[:, 2]

#Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

#Print the X_train dataframe
X_train.head()

Unnamed: 0,TRB%,AST%,BLK%,TOV%,3P%,USG%
11173,11.0,7.7,0.9,10.4,0.375,13.1
9174,5.7,15.1,0.3,16.6,0.154,22.0
3412,14.2,4.6,1.4,12.8,0.0,22.5
5939,2.7,22.3,0.0,63.6,0.5,23.4
4719,3.3,37.0,0.0,33.3,1.0,7.5


# Scaling of Dataset using StandardScaler

In [6]:
#Import the StandardScaler for scaling the dataset
from sklearn.preprocessing import StandardScaler

#Generate the scaling function for the features
X_scaler = StandardScaler().fit(X_train)

#Apply the scaling function to the features
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Creation of the SVC Model and GridSearch Estimator

In [7]:
# Create the SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model

SVC(kernel='linear')

In [8]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

# Fitting of the Model Using GridSearch

In [9]:
# Fit the model using the grid search estimator. 
# This will take the SVC model and try each combination of parameters
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1, gamma=0.0001, score=0.604, total=   3.8s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.8s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.616, total=   3.7s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    7.5s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.627, total=   3.4s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.616, total=   3.3s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.626, total=   3.4s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.604, total=   3.5s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.616, total=   3.8s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.627, total=   3.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.616, total=   3.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:  5.7min finished


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             verbose=3)

# Model Summary

In [10]:
# List the best parameters for this dataset
print(grid.best_params_)

{'C': 5, 'gamma': 0.0001}


In [11]:
# List the best score
print(grid.best_score_)

0.6179559118236473


# Model Predictions

In [12]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test_scaled)

In [17]:
# Print the overall accuracy of the model
print('Test Accuracy: %.3f' % grid.score(X_test_scaled, y_test))

Test Accuracy: 0.624


In [15]:
# Count the support of each target in the y_test array
unique, counts = np.unique(y_test, return_counts=True)
dict(zip(unique, counts))

{'C': 840, 'PF': 900, 'PG': 849, 'SF': 775, 'SG': 795}

In [16]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["C", "PF", "PG", "SF", "SG"]))

              precision    recall  f1-score   support

           C       0.66      0.62      0.64       840
          PF       0.53      0.54      0.54       900
          PG       0.83      0.82      0.82       849
          SF       0.53      0.50      0.51       775
          SG       0.57      0.64      0.60       795

    accuracy                           0.62      4159
   macro avg       0.62      0.62      0.62      4159
weighted avg       0.63      0.62      0.62      4159

