## Package import

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras import layers
import csv
np.set_printoptions(precision=3, suppress=True)


## Cleaning

# Prepare data for Neural Network

In [35]:
#Evaluate for profitability
def get_profit(profit_arr):
    """
    Calculates the profitability of a model
    Args:
        Profit_arr
    Returns: The profitability, in dollars, of a given model
    """
    profit = 0
    for i in range(len(profit_arr)):
        game = profit_arr[i]
        if (game[1] < game[2]):
            if game[0] < game[2]:
                profit = profit + .9
            else:
                profit = profit - 1
        else:
            if game[0] > game[2]:
                profit = profit + .9
            else:
                profit = profit - 1        
    return(profit)

def train_NN(feature_df):
    """
    Fits and trains a sequential Neural Network based on the features in the feature_df
    Args:
        feature_df
    Returns: Total profit, mean and standard deviation of the difference between predicted score 
    and actual score
    """
    df = feature_df[['away_past_average_yards/play',
    'home_past_average_yards/play',
    'away_past_starting_yard_line',
    'home_past_starting_yard_line',
    'away_schedule_strength',
    'home_schedule_strength',
    'spread', 
    'margin',
    'id']]


    
    df = df[pd.to_numeric(df['home_past_average_yards/play'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['away_past_average_yards/play'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['home_past_starting_yard_line'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['away_past_starting_yard_line'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['home_schedule_strength'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['away_schedule_strength'], errors='coerce').notnull()]
    df = df[pd.to_numeric(df['spread'], errors='coerce').notnull()]


    labels = df.pop('margin')


    s_1 = df.pop('away_past_average_yards/play')
    s_2 = df.pop('home_past_average_yards/play')
    s_3 = df.pop('away_past_starting_yard_line')
    s_4 = df.pop('home_past_starting_yard_line')
    s_5 = df.pop('away_schedule_strength')
    s_6 = df.pop('home_schedule_strength')
    s_9 = df.pop('spread')*-1
    s_10 = df.pop('id')

    feature_df = pd.DataFrame()
    feature_df = pd.concat([s_1, s_2, s_3, s_4, s_5, s_6,s_9,s_10, labels], axis=1).reset_index()
    feature_df = feature_df.drop( 'index', axis = 1)
    feature_df = np.array(feature_df).astype('float32')

    np.random.shuffle(feature_df)

    training, test, = feature_df[:350,:], feature_df[350:,:]
    x_train = training[:,[0,1,2,3,4,5,6]]
    y_train = training[:,8]
    train_spread = training[:,[6]]
    train_id = training[:,7]

    x_test = test[:,[0,1,2,3,4,5,6]]
    y_test = test[:,8]
    test_spread = test[:,[6]]
    test_id = test[:,7]

    y_train = y_train*.01
    y_test = y_test*.01
    
    #Normalize features
    normalize = layers.Normalization()
    normalize.adapt(x_train)

    
    #Build Neural Network
    from keras import callbacks
    earlystopping = callbacks.EarlyStopping(monitor = 'loss', 
                                        mode ="min", patience = 8, 
                                        restore_best_weights = True)
    epoch = 430
    
    #model architecture - No activation function included intentionally 
    #as it led to a decrease in predictive power
    model = tf.keras.Sequential([
        normalize,
        layers.Dense(6),
        layers.Dense(1)
    
    ])
    model.compile(loss = tf.keras.losses.MeanSquaredError(),
                        optimizer = tf.optimizers.Adam())
    
    model.fit(x_train, y_train, epochs = epoch, verbose=0,callbacks = [earlystopping])
    prediction = model.predict(x_test)

    spread = test_spread
    shape = test_spread.shape
    spread = np.reshape(spread, (shape[0],1))
    spread = spread*.01
    print()
    z = np.array([(y_test)])
    z = z.swapaxes(0,1)

    actual_scores = z
    actual_vs_pred = np.concatenate((z, prediction), axis = 1)
    profit_arr = np.concatenate((actual_vs_pred, spread), axis = 1)

    correct_win = (np.sign(actual_scores) == np.sign(prediction))
    difference = actual_scores-prediction
    print("% of correct winning team")
    print(correct_win.sum()/correct_win.size)
    win_percent = correct_win.sum()/correct_win.size
    print()
    print("Mean")
    mean = difference.mean()
    print(difference.mean())
    print()
    print("Standard Deviation")
    print(difference.std())
    sd = difference.std()
    print("")
    print("Total Profit")
    profit = get_profit(profit_arr)
    print(profit)
