In [1]:
# import dependencies
import numpy as np
import pandas as pd
import tensorflow
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# read in csv from S3 bucket
cleaned_df = pd.read_csv("https://uci-dataproject3.s3-us-west-1.amazonaws.com/AllTimeNbaSeason4Categories1990.csv")
cleaned_df.columns

Index(['Year', 'Player', 'Pos', 'Age', 'G', 'GS', 'MP', 'PER', 'USG%', 'WS',
       'BPM', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', 'FT', 'FTA',
       'FT%', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PPG', 'RPG',
       'APG', 'SPG', 'BPG', 'NBA_PER_Range'],
      dtype='object')

In [3]:
# assign X (data) and y (target)
X = cleaned_df[['PPG','APG','RPG','SPG','BPG','FG%','FT%','3P%']]

X_names = X.columns

y = cleaned_df['NBA_PER_Range'].astype('str')

In [4]:
# split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# create a standard scaler model and fit it to the training data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [7]:
# create deep learning model and add layers
deep_model = Sequential()
deep_model.add(Dense(units=100, activation='relu', input_dim=8))
deep_model.add(Dense(units=100, activation='relu'))
deep_model.add(Dense(units=4, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [8]:
# compile and fit the model
deep_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
# run a summary
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               900       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 404       
Total params: 11,404
Trainable params: 11,404
Non-trainable params: 0
_________________________________________________________________


In [10]:
# fit the model
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=250,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x7fa953f31810>

In [11]:
# examine loss and accuracy of deep learning
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=0)
print(f"Deep Learning Model - Model Loss: {model_loss}, Model Accuracy: {model_accuracy}")

Deep Learning Model - Model Loss: 0.3989728803338321, Model Accuracy: 0.8357641100883484


In [12]:
# received an error when using joblib, so used the saving model that was learned in the course for deep learning.
deep_model.save("Deep_Learning_Model.h5")