In [1]:
# Dependencies
import pandas as pd

In [2]:
# Load data
df = pd.read_csv('../data/machineLearningDataSet.csv')

# Drop the null columns where all values are null -- nothing to drop
df = df.dropna(axis='columns', how='all')

# Drop the null rows -- nothing to drop
df = df.dropna()

# Drop Unnamed: 0
df = df.drop(columns=['Unnamed: 0'])

df

Unnamed: 0,gameID,homeTeamID,visitorTeamID,homeTeamHeightAverage,homeTeamWeightAverage,homeTeamAgeAverage,visitorTeamHeightAverage,visitorTeamWeightAverage,visitorTeamAgeAverage,homeTeamWin
0,1.0,2.0,23.0,199.390000,102.625190,26.416667,200.269231,96.440638,25.153846,1.0
1,2.0,10.0,21.0,201.441538,97.906089,28.000000,199.878462,97.696738,25.692308,1.0
2,3.0,4.0,17.0,201.050769,99.301756,26.846154,201.050769,100.941666,26.692308,0.0
3,4.0,9.0,3.0,198.966667,100.243832,27.466667,201.718333,98.656260,25.166667,1.0
4,5.0,12.0,15.0,199.683077,97.208255,26.461538,201.832308,99.511106,27.692308,1.0
...,...,...,...,...,...,...,...,...,...,...
6553,62481.0,28.0,10.0,198.901538,97.626955,27.923077,201.832308,99.476215,28.615385,0.0
6554,62482.0,10.0,28.0,202.418462,100.348507,28.692308,198.315385,97.975872,27.923077,0.0
6555,62483.0,10.0,28.0,201.832308,99.476215,28.615385,198.901538,98.952839,28.000000,0.0
6556,62484.0,28.0,10.0,198.901538,97.626955,27.923077,202.418462,100.522966,29.230769,0.0


In [3]:
# Assign X (data) and y (target)
drop_col = ['gameID', 'homeTeamID', 'visitorTeamID', 'homeTeamWin']
X = df.drop(drop_col, axis=1)
y = df['homeTeamWin']

y_names = ['Lose', 'Win']

print(X.shape, y.shape)

(6558, 6) (6558,)


In [4]:
# Split our data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [7]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [8]:
# Set up for deep learning model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
# Create a Neural Network model 
model = Sequential()

model.add(Dense(units=20, activation='relu', input_dim=6))
model.add(Dense(units=20, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 20)                140       
_________________________________________________________________
dense_4 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 42        
Total params: 602
Trainable params: 602
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 4918 samples
Epoch 1/100
4918/4918 - 1s - loss: 0.6730 - accuracy: 0.5866
Epoch 2/100
4918/4918 - 0s - loss: 0.6639 - accuracy: 0.5990
Epoch 3/100
4918/4918 - 0s - loss: 0.6604 - accuracy: 0.6009
Epoch 4/100
4918/4918 - 0s - loss: 0.6595 - accuracy: 0.6015
Epoch 5/100
4918/4918 - 0s - loss: 0.6586 - accuracy: 0.6070
Epoch 6/100
4918/4918 - 0s - loss: 0.6580 - accuracy: 0.6065
Epoch 7/100
4918/4918 - 0s - loss: 0.6578 - accuracy: 0.6049
Epoch 8/100
4918/4918 - 0s - loss: 0.6585 - accuracy: 0.6031
Epoch 9/100
4918/4918 - 0s - loss: 0.6579 - accuracy: 0.6043
Epoch 10/100
4918/4918 - 0s - loss: 0.6583 - accuracy: 0.6047
Epoch 11/100
4918/4918 - 0s - loss: 0.6580 - accuracy: 0.6082
Epoch 12/100
4918/4918 - 0s - loss: 0.6580 - accuracy: 0.6015
Epoch 13/100
4918/4918 - 0s - loss: 0.6583 - accuracy: 0.6057
Epoch 14/100
4918/4918 - 0s - loss: 0.6579 - accuracy: 0.6061
Epoch 15/100
4918/4918 - 0s - loss: 0.6580 - accuracy: 0.6037
Epoch 16/100
4918/4918 - 0s - loss: 0.6582 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1ece27275c0>

In [15]:
# Scoring model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f'Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}')

1640/1640 - 0s - loss: 0.6744 - accuracy: 0.5933
Deep Neural Network - Loss: 0.6743560471185823, Accuracy: 0.5932926535606384


In [16]:
# Save the model
model.save('deep_neural_model.h5')