## Functions and Run Model 

#### Alvee Hoque 

I recommend running this with a GPU because the run time will take awhile. If you have no access to one, use Google Colab with GPU or TPU. Run with fewer epochs and be more selective with which players to pick. Following notebook will have compare results. 

In [None]:
import pandas as pd
import numpy as np 
import warnings
import timeit

# ML Imports 
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler 

from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

In [None]:
warnings.filterwarnings('ignore')

In [None]:
# reading data 
qb = pd.read_csv('fantasy_data/qb_fantasy.csv')
rb = pd.read_csv('fantasy_data/rb_fantasy.csv')
wr = pd.read_csv('fantasy_data/wr_fantasy.csv')
te = pd.read_csv('fantasy_data/te_fantasy.csv')

In [None]:
# renaming the player name column 
qb = qb.rename(columns={'Unnamed: 0': 'name'})
rb = rb.rename(columns={'Unnamed: 0': 'name'})
wr = wr.rename(columns={'Unnamed: 0': 'name'})
te = te.rename(columns={'Unnamed: 0': 'name'})

In [None]:
# All Functions Required

def series_to_supervised(data, lag=1):
    df = pd.DataFrame(data)
    
    # input value is previous game and output value is the next game
    columns = [df.shift(i) for i in range (1, lag+1)]
        
    columns.append(df)
    df = pd.concat(columns, axis=1)
    
    #any missing rows will be filled with 0 
    df.fillna(0, inplace=True)

    df.columns = ['X', 'y']
    return df 

def ttsplit(dataframe):
    train, test = dataframe[1:-16].values, dataframe[-16:].values
    return train,test

def scale(train,test):
    #fit scaler 
    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler = scaler.fit(train)
    
    #transform 
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    
    return scaler, train_scaled, test_scaled

def fit_lstm(train, batch_size, num_epoch, neurons):
    X,y = train[:,0:-1], train[:,-1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    for i in range(num_epoch):
        model.fit(X,y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
    return model

def forecast_lstm(model, batch_size, X): 
    X = X.reshape(1,1,len(X))
    ypred = model.predict(X, batch_size=batch_size)
    return ypred[0,0]


def invert_scale(scaler, X, value): 
    new_row = [x for x in X] + [value]
    vals = np.array(new_row)
    vals = vals.reshape(1, len(vals))
    inverted = scaler.inverse_transform(vals)
    
    return inverted[0,-1]

In [None]:
def run_model(dataframe,position):
    
    subset = dataframe.count(axis=1) > 40
    df_subset = dataframe[subset]

    for row in range(0,len(df_subset)):

        #player name
        player_name = df_subset.iloc[row].values[0]
        #make player df with stats
        player_df = df_subset[df_subset.name == player_name]
        #remove nan
        player_df = player_df[player_df.columns[~player_df.isnull().all()]]
        # player stats as np array 
        player_stats = player_df.values[0][1:]


        #Turn into supervised learning problem with time lag 
        df_lag = series_to_supervised(player_stats)

        #create df with no time lag    
        game = list(range(0,len(df_lag+1)))
        fantasy_points = list(df_lag.y.values)
        df_nolag = pd.DataFrame({'game': game, 'y': fantasy_points})

        #train test split 
        train,test = ttsplit(df_nolag)

        #scale
        scaler, train_scaled, test_scaled = scale(train,test)

        #train model 
        rnn = fit_lstm(train_scaled, 1, 400, 8)

        #Make Prediction
        preds = []
        for i in range(len(test_scaled)):
            X, y = test_scaled[i, 0:-1], test_scaled[i, 1]
            ypred = forecast_lstm(rnn, 1, X)
            # Inverse scale
            ypred = invert_scale(scaler, X, ypred)
            preds.append(ypred)





        games = list(range(1,17))
        actual = test[:,1].tolist()

        tmp_name_df = pd.DataFrame(data=[player_name], columns={'name'})
        tmp_actual_df = pd.DataFrame(data=[actual],columns=games)
        tmp_pred_df = pd.DataFrame(data=[preds], columns=games)

        if row == 0: 
            final_actuals = pd.concat([tmp_name_df, tmp_actual_df], axis = 1) 
            final_preds = pd.concat([tmp_name_df, tmp_pred_df], axis =1)
        else:
            tmp_final_actual = pd.concat([tmp_name_df, tmp_actual_df], axis = 1)
            tmp_final_preds = pd.concat([tmp_name_df, tmp_pred_df], axis =1)

            final_actuals = pd.concat([final_actuals, tmp_final_actual])
            final_preds = pd.concat([final_preds, tmp_final_preds])


    final_actuals.to_csv(f'{position}_actual_last16.csv',index=False, header=True)
    final_preds.to_csv(f'{position}_prediction_last16.csv',index=False, header=True)

In [None]:
start = timeit.default_timer()
run_model(qb,'qb')
stop = timeit.default_timer()
print('Time to Run (seconds): ', stop-start )