In [1]:
# import dependencies
import numpy as np
import pandas as pd
import tensorflow
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# read in csv from S3 bucket
cleaned_df = pd.read_csv("https://uci-dataproject3.s3-us-west-1.amazonaws.com/AllTimeNbaSeason4Categories1990.csv")
cleaned_df.head()

Unnamed: 0,Year,Player,Pos,Age,G,GS,MP,PER,USG%,WS,...,BLK,TOV,PF,PTS,PPG,RPG,APG,SPG,BPG,NBA_PER_Range
0,1990.0,Mark Acres,C,27.0,80.0,50.0,1691.0,8.3,9.4,1.6,...,25.0,70.0,248.0,362.0,4.525,5.3875,0.8375,0.45,0.3125,End of the Bench
1,1990.0,Michael Adams,PG,27.0,79.0,74.0,2690.0,15.4,18.5,6.9,...,3.0,141.0,133.0,1221.0,15.455696,2.848101,6.265823,1.531646,0.037975,Starter
2,1990.0,Mark Aguirre,SF,30.0,78.0,40.0,2005.0,15.8,24.3,5.7,...,19.0,121.0,201.0,1099.0,14.089744,3.910256,1.858974,0.435897,0.24359,Starter
3,1990.0,Danny Ainge,PG,30.0,75.0,68.0,2727.0,16.1,23.0,4.8,...,18.0,185.0,238.0,1342.0,17.893333,4.346667,6.04,1.506667,0.24,Starter
4,1990.0,Mark Alarie,PF,26.0,82.0,10.0,1893.0,14.1,20.4,3.1,...,39.0,101.0,219.0,860.0,10.487805,4.560976,1.731707,0.731707,0.47561,End of the Bench


In [3]:
# assign X (data) and y (target)
X = cleaned_df[['PPG','APG','RPG','SPG','BPG','FG%','FT%','3P%']]

X_names = X.columns

y = cleaned_df['NBA_PER_Range'].astype('str')

In [4]:
# split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# create a minmax scaler model and fit it to the training data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [7]:
# create deep learning model and add layers
deep_model = Sequential()
deep_model.add(Dense(units=50, activation='relu', input_dim=8))
deep_model.add(Dense(units=50, activation='relu'))
deep_model.add(Dense(units=4, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [8]:
# compile and fit the model
deep_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
# run a summary
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                450       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 204       
Total params: 3,204
Trainable params: 3,204
Non-trainable params: 0
_________________________________________________________________


In [10]:
# fit the model
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=250,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x7ff5d03e0f10>

In [11]:
# examine loss and accuracy of deep learning
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=0)
print(f"Deep Learning Model - Model Loss: {model_loss}, Model Accuracy: {model_accuracy}")

Deep Learning Model - Model Loss: 0.38211523575652756, Model Accuracy: 0.8424223065376282


In [12]:
# show actual results versus the models prediction
deep_predictions = deep_model.predict_classes(X_test_scaled[:5])
deep_prediction_labels = label_encoder.inverse_transform(deep_predictions)
print(f"Predicted Labels: {deep_prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted Labels: ['End of the Bench' 'End of the Bench' 'End of the Bench'
 'End of the Bench' 'All-Star']
Actual Labels: ['End of the Bench', 'End of the Bench', 'End of the Bench', 'Starter', 'All-Star']


In [13]:
# received an error when using joblib, so used the saving model that was learned in the course for deep learning.
deep_model.save("Deep_Learning_Model.h5")