In [126]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

filename = './_data/filtered_data.csv'
data = pd.read_csv('./_data/filtered_data.csv')

In [127]:
data = data.drop(['Unnamed: 0'],axis=1)
data.head()

Unnamed: 0,Date,opptAbbr,teamAbbr,Spread,Total,Open_Line_OU,O/U,teamDayOff,teamPTS,teamAST,...,opptFIC40,opptOrtg,opptDrtg,opptEDiff,opptPlay%,opptAR,opptAST/TO,opptSTL/TO,opptPoss,opptPace
0,2017-10-17,CLE,BOS,-3.0,201.0,216.5,0.0,0.0,99.0,24.0,...,55.52,102.7,99.7,3.0,0.42,17.59,1.12,0.18,99.3,99.3
1,2017-10-17,GSW,HOU,1.0,243.0,235.0,1.0,0.0,122.0,28.0,...,86.56,118.6,119.6,-1.0,0.47,27.92,2.0,0.29,102.0,102.0
2,2017-10-18,ORL,MIA,-7.0,225.0,207.0,1.0,0.0,109.0,27.0,...,78.23,110.3,103.6,6.7,0.46,19.43,1.57,0.57,105.2,105.2
3,2017-10-18,WAS,PHI,-5.0,235.0,215.0,1.0,0.0,115.0,25.0,...,80.0,115.1,110.3,4.8,0.47,19.04,2.33,0.89,104.3,104.3
4,2017-10-18,BOS,MIL,8.0,208.0,209.5,0.0,0.0,108.0,19.0,...,58.65,102.2,110.4,-8.2,0.42,19.7,1.92,1.0,97.9,97.9


In [128]:
from sklearn.model_selection import train_test_split

def model_preprocessing(dataset,date_range,test_size):
    """
    Takes filtered dataset and returns train_test_split dataset for training model. Removes columns of dtype == "object".
    
    :dataset: input dataset (pd.DataFrame type)
    :seasons: range of seasons eg. ("2016-01-01","2019-01-01")
    :test_size: validation set size
    """
    
    seasonal_data = dataset.loc[np.where((dataset['Date'] > date_range[0]) & (dataset['Date'] < date_range[1]))]
    
    column_ind = [i for i in range(0,len(dataset.dtypes)) if dataset.dtypes[i] == 'object']
    seasonal_data = seasonal_data.drop(columns = list(seasonal_data.columns[column_ind]), axis = 1)
    
    OU_classification = np.asarray(seasonal_data['O/U'])
    
    seasonal_data = seasonal_data.drop(columns = ['O/U'], axis = 1)
    seasonal_data = np.asarray(seasonal_data, dtype = float)   
    seasonal_data = keras.utils.normalize(seasonal_data, axis=1)
     
    return train_test_split(seasonal_data, OU_classification, random_state=42,test_size = test_size)

X_train, X_test, y_train, y_test = model_preprocessing(dataset = data.drop(columns = ['Total'],axis = 1),
                                                       date_range = ("2015-01-01", "2029-01-01"),
                                                       test_size = 0.1)
    

In [129]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu6))
model.add(tf.keras.layers.Dense(3, activation=tf.nn.softmax))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'],callbacks=[callback])

In [130]:
model.fit(X_train, y_train, epochs=50, validation_split=0.1, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x17266b910>