In [1]:
# Dependencies
import pandas as pd

In [2]:
# Load data
df = pd.read_csv('../data/machineLearningDataSetTime.csv')

# Drop the null columns where all values are null -- nothing to drop
df = df.dropna(axis='columns', how='all')

# Drop the null rows -- nothing to drop
df = df.dropna()

# Drop Unnamed: 0
df = df.drop(columns=['Unnamed: 0'])

df

Unnamed: 0,gameID,homeTeamID,visitorTeamID,homeTeamHeightAverage,homeTeamWeightAverage,homeTeamAgeAverage,visitorTeamHeightAverage,visitorTeamWeightAverage,visitorTeamAgeAverage,homeTeamWin
0,1.0,2.0,23.0,181.976007,91.059822,24.269028,202.525136,99.393064,26.046181,1.0
1,2.0,10.0,21.0,201.248610,98.947063,28.704028,201.397482,96.940202,25.975764,1.0
2,3.0,4.0,17.0,197.830369,94.730579,27.620556,201.563640,102.968376,27.153889,0.0
3,4.0,9.0,3.0,197.233822,100.863899,27.403889,201.088801,99.225833,25.976597,1.0
4,5.0,12.0,15.0,199.576267,97.996063,27.405972,201.274186,98.656229,28.807361,1.0
...,...,...,...,...,...,...,...,...,...,...
6107,48782.0,7.0,16.0,202.166008,98.410501,27.240903,201.209275,102.920497,27.350417,0.0
6108,48783.0,8.0,26.0,199.630418,97.061349,26.263403,200.720325,97.861088,25.140556,1.0
6109,48784.0,25.0,10.0,201.403832,97.838030,26.258819,200.810813,98.499487,28.190069,1.0
6110,48785.0,13.0,24.0,199.021524,93.473719,26.467361,201.239437,95.521120,24.004375,1.0


In [3]:
# Assign X (data) and y (target)
drop_col = ['gameID', 'homeTeamID', 'visitorTeamID', 'homeTeamWin']
X = df.drop(drop_col, axis=1)
y = df['homeTeamWin']

y_names = ['Lose', 'Win']

print(X.shape, y.shape)

(6112, 6) (6112,)


In [4]:
# Split our data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [7]:
# Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [8]:
# Set up for deep learning model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [9]:
# Create a Neural Network model 
model = Sequential()

model.add(Dense(units=20, activation='relu', input_dim=6))
model.add(Dense(units=20, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                140       
_________________________________________________________________
dense_1 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 42        
Total params: 602
Trainable params: 602
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 4584 samples
Epoch 1/100
4584/4584 - 0s - loss: 0.6734 - accuracy: 0.5862
Epoch 2/100
4584/4584 - 0s - loss: 0.6680 - accuracy: 0.5918
Epoch 3/100
4584/4584 - 0s - loss: 0.6637 - accuracy: 0.6006
Epoch 4/100
4584/4584 - 0s - loss: 0.6608 - accuracy: 0.6130
Epoch 5/100
4584/4584 - 0s - loss: 0.6596 - accuracy: 0.6108
Epoch 6/100
4584/4584 - 0s - loss: 0.6596 - accuracy: 0.6073
Epoch 7/100
4584/4584 - 0s - loss: 0.6585 - accuracy: 0.6091
Epoch 8/100
4584/4584 - 0s - loss: 0.6573 - accuracy: 0.6182
Epoch 9/100
4584/4584 - 0s - loss: 0.6582 - accuracy: 0.6174
Epoch 10/100
4584/4584 - 0s - loss: 0.6567 - accuracy: 0.6156
Epoch 11/100
4584/4584 - 0s - loss: 0.6566 - accuracy: 0.6141
Epoch 12/100
4584/4584 - 0s - loss: 0.6565 - accuracy: 0.6178
Epoch 13/100
4584/4584 - 0s - loss: 0.6572 - accuracy: 0.6084
Epoch 14/100
4584/4584 - 0s - loss: 0.6558 - accuracy: 0.6171
Epoch 15/100
4584/4584 - 0s - loss: 0.6565 - accuracy: 0.6165
Epoch 16/100
4584/4584 - 0s - loss: 0.6558 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x21bae6ebac8>

In [11]:
X_train_scaled[0]

array([0.55606409, 0.57096436, 0.36772729, 0.54736005, 0.62150006,
       0.48209551])

In [12]:
X_train

Unnamed: 0,homeTeamHeightAverage,homeTeamWeightAverage,homeTeamAgeAverage,visitorTeamHeightAverage,visitorTeamWeightAverage,visitorTeamAgeAverage
1615,222.090015,110.783704,27.172431,223.945803,112.713076,28.855486
3749,203.743807,99.169732,24.460000,198.563971,92.933441,24.184375
2860,223.387356,109.056652,27.550278,220.974179,110.377172,34.058333
4844,200.721207,98.395949,27.962083,201.505608,97.831605,27.261528
4761,200.553461,98.038965,25.656319,200.154646,98.988957,26.763542
...,...,...,...,...,...,...
3772,202.053825,99.426736,26.551667,202.908429,97.365161,24.549444
5191,200.163994,102.011297,27.015694,202.044300,99.699522,27.223889
5226,202.518257,98.606554,25.769444,200.990200,101.475082,25.422708
5390,203.835176,100.911652,26.242222,200.393300,99.574154,26.222778


In [13]:
# Scoring model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f'Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}')

1640/1640 - 0s - loss: 0.6736 - accuracy: 0.5860
Deep Neural Network - Loss: 0.6736034716047892, Accuracy: 0.5859755873680115


In [14]:
# Save the model
model.save('saved_deep_neural_game.h5')

In [15]:
# Save the scaler
from pickle import dump

dump(X_scaler, open('scaler.pkl', 'wb'))