In [None]:
# Imports
import pandas as pd
from pathlib import Path
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

: 

In [147]:
# Upload cc_default.csv to Colab
# from google.colab import files

# csv_file = files.upload()

# Load data
df = pd.read_csv(Path('./NBA_20_yr.csv'))
df.head()

Unnamed: 0,Rk,MPG,PPG,RPG,APG
0,1,33.2,16.5,9.3,2.5
1,2,35.3,16.3,3.9,6.7
2,3,26.7,14.6,1.9,3.5
3,4,28.6,12.9,2.8,2.1
4,5,30.5,13.4,4.1,3.0


In [148]:
# Create a column to hold Group data
df.loc[:,'Group'] = 0
df.fillna(0, inplace = True)
df

Unnamed: 0,Rk,MPG,PPG,RPG,APG,Group
0,1,33.2,16.5,9.3,2.5,0
1,2,35.3,16.3,3.9,6.7,0
2,3,26.7,14.6,1.9,3.5,0
3,4,28.6,12.9,2.8,2.1,0
4,5,30.5,13.4,4.1,3.0,0
...,...,...,...,...,...,...
1863,56,3.5,0.5,0.0,0.5,0
1864,57,0.0,0.0,0.0,0.0,0
1865,58,6.5,1.1,1.5,0.1,0
1866,59,0.0,0.0,0.0,0.0,0


---
## Find top 15
### Binary Classification

In [149]:
# Create groups based on ranks
for index, row in df.iterrows():
    if row['Rk'] <= 15:
        df.at[index,'Group'] = 1
    else:
        df.at[index,'Group'] = 0
    
df

Unnamed: 0,Rk,MPG,PPG,RPG,APG,Group
0,1,33.2,16.5,9.3,2.5,1
1,2,35.3,16.3,3.9,6.7,1
2,3,26.7,14.6,1.9,3.5,1
3,4,28.6,12.9,2.8,2.1,1
4,5,30.5,13.4,4.1,3.0,1
...,...,...,...,...,...,...
1863,56,3.5,0.5,0.0,0.5,0
1864,57,0.0,0.0,0.0,0.0,0
1865,58,6.5,1.1,1.5,0.1,0
1866,59,0.0,0.0,0.0,0.0,0


In [150]:
# Set Feature and Target
X = df.drop(columns=['Rk','Group'])
y = df['Group']
y.value_counts()

0    1388
1     480
Name: Group, dtype: int64

In [151]:
# Split dataset into train, test datasets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 1)
# Initiate the scaler
X_scaler = StandardScaler()
# Fit the scaler to the features dataset
X_scaler = X_scaler.fit(X)
# Scale train, test datasets
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [152]:
X_test_scaled[0]

array([ 0.59432799,  0.52259442, -0.25369002, -0.0878169 ])

In [160]:
import pickle

In [164]:
# save the scaler
Standard_Scaler = open("scaler.pkl","wb")
pickle.dump(X_scaler,Standard_Scaler) 
Standard_Scaler.close() 
# Load the scaler
model = open("scaler.pkl","rb")           
new_scaler = pickle.load(model)           
new_scaler

StandardScaler()

---
## Use the SVC classifier model

In [153]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC(kernel = 'linear', random_state = 0)
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [154]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)

              precision    recall  f1-score   support

           0       0.83      0.94      0.88       357
           1       0.65      0.38      0.48       110

    accuracy                           0.81       467
   macro avg       0.74      0.66      0.68       467
weighted avg       0.79      0.81      0.79       467



---
## Save, load and use this model

In [156]:
# save the model
Support_Vector_Machine = open("model.pkl","wb")
pickle.dump(svm_model,Support_Vector_Machine) 
Support_Vector_Machine.close() 

In [157]:
# Load the model
model = open("model.pkl","rb")           
new_model = pickle.load(model)           
new_model

SVC(kernel='linear', random_state=0)

In [158]:
import numpy as np
# Used the model for Prediction
data = np.array([[28.6, 12.9, 2.8, 2.1]])
data_scaled = X_scaler.transform(data)
new_model.predict(data_scaled)

  "X does not have valid feature names, but"


array([0])

In [159]:
if new_model.predict(data_scaled) == 1:
    print("The player you choose will be in the Top 15 picks")
else:
    print("The player you choose will rank 16 and below")

The player you choose will rank 16 and below
