# Neural Network Training
The purpose of this notebook is for training the LSTM RNN for price prediction of Modern Tournament Magic: The Gathering cards. This is intended as a supplement to the Main notebook, which highlights the predictions in a more intuitive way. 

In [59]:
#import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import mpld3
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, TimeDistributed, Conv3D, MaxPooling1D, Flatten
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from MTGDeckScraper import get_price_history

## Useful Functions
Many useful functions for converting the time series data to supervised learning format, including: 
- Differencing and MinMaxScaling the price and tournament play data
- Creating lag_time and target prediction sequence columns
- Creating and fitting the lstm model
- Inverse transforming the data for comparing predictions


In [216]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    #Creates dataframe of supervised data X for n_in days and n_out days forward
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def prepare_data(series, n_test, n_lag, n_seq):
    # extract raw values
    raw_values = series.values.reshape(-1,1)
    # transform data to be stationary
    diff_series = difference(raw_values, 1)
    diff_values = diff_series.values
    diff_values = diff_values.reshape(len(diff_values), 1)
    # rescale values to -1, 1
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_values = scaler.fit_transform(diff_values)
    scaled_values = scaled_values.reshape(len(scaled_values), 1)
    # transform into supervised learning problem X, y
    supervised = series_to_supervised(scaled_values, n_lag, n_seq)
    supervised_values = supervised.values
    #Output non_diff_scaled y_vals
    y_raw = series_to_supervised(raw_values,n_lag,n_seq)
    y_raw_values = y_raw.values
    # split into train and test sets
    y_train_raw, y_test_raw = y_raw_values[0:-n_test], y_raw_values[-n_test:]
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return scaler, train, test,y_train_raw,y_test_raw


# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

# invert differenced value
def inverse_difference(history, yhat, interval=1):
    for j in range(1,len(yhat)+1):
        yhat[0][j] += yhat[0][j-1]
    ans = yhat+history
    return ans

# fit an LSTM network to training data
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons,test):
    # reshape training into [samples, timesteps, features]
    X, y = train[:, 0:n_lag*2], train[:, n_lag*2:]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    X_val, y_val = test[:, 0:n_lag*2], test[:, n_lag*2:]
    X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])
    # design network
    model = Sequential()
    model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]),
                   kernel_initializer='random_uniform', return_sequences=True, stateful=True))
    #model.add(Conv1D(filters = 32, kernel_size = [5], padding='same'))
    model.add(LSTM(n_neurons, stateful=True, return_sequences = True))
    model.add(LSTM(n_neurons))
    #model.add(Dropout(0.2))
    model.add(Dense(y.shape[1]))
    model.compile(loss=['mean_squared_error'], optimizer='adam')
    # fit network
    histories = []
    for i in range(nb_epoch):
        h_m = model.fit(X, y, epochs=1, batch_size=n_batch, validation_data = (X_val,y_val), verbose=True, shuffle=False)
        histories.append(h_m)
        model.reset_states()
    return model, histories

#Forecast to test values
def forecast_lstm(model, X, n_batch):
    # reshape input pattern to [samples, timesteps, features]
    X = X.reshape(1, 1, len(X))
    # make forecast
    forecast = model.predict(X, batch_size=1)
    # convert to array
    return [x for x in forecast[0, :]]

def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
    forecasts = list()
    for i in range(len(test)):
        X, y = test[i, 0:n_lag*2], test[i, n_lag*2:]
        # make forecast
        forecast = forecast_lstm(model, X, n_batch)
        # store the forecast
        forecasts.append(forecast)
    return forecasts

def inverse_transform(series, forecasts, scaler):
    inverted = list()
    for i in range(len(forecasts)):
        # create array from forecast
        forecast = np.array(forecasts[i])
        forecast = forecast.reshape(1, len(forecast))
        # invert scaling
        inv_scale = scaler.inverse_transform(forecast)
        # store
        undiffed = inverse_difference(series[i],inv_scale)
        inverted.append(undiffed)
    return inverted

## Loading Data
Loads the price and tournament play data from previously generated csv for all cards in the dataset. From this data, tournament play frequencies are generated and converted to supervised learning data. The most recent 400 days of data are taken off and used as validation data for fitting the model. 

In [186]:
#Load Data from CSV's
# cards = [['Shadowmoor','Fulminator Mage'], ['New Phyrexia', 'Surgical Extraction'], 
#          ['Future Sight', 'Tarmogoyf'], ['Zendikar','Scalding Tarn'], 
#          ['Innistrad', 'Snapcaster Mage'], ['Time Spiral','Ancestral Vision'], 
#          ['Mirrodin','Chalice of the Void'],['Ninth Edition', 'Blood Moon'],
#          ['Khans of Tarkir', 'Polluted Delta'], ['Mirrodin Besieged', 'Inkmoth Nexus'],
#          ['Eighth Edition', 'Ensnaring Bridge'], ['New Phyrexia', 'Karn Liberated'],
#          ['Ravnica City of Guilds', 'Sacred Foundry'], ['Magic 2011', 'Leyline of the Void'],
#          ['Avacyn Restored', 'Cavern of Souls'], ['Ravnica City of Guilds', 'Chord of Calling']]
CopiesPlayed = pd.read_csv('Copies_Played_Culled.csv')
CopiesPlayed['Date'] = pd.to_datetime(CopiesPlayed['Date'])
CopiesPlayed = CopiesPlayed.set_index('Date')
Resamp = CopiesPlayed.resample('D').sum()

Prices = pd.read_csv('All_Prices.csv')
Prices['Date'] = pd.to_datetime(Prices['Date'])

cards = list(Prices.keys()[1:])
bad_cards = ['Urza\'s Tower']
for baddie in bad_cards:
    cards.remove(baddie)

All_Data = []
del(FullTrain)

n_lag = 30
n_seq = 7
n_test = 400
val_card = 2

for card in cards:
    Resamp['Meta_Share'] = Resamp[card]/Resamp['Total']
    Price = Prices[['Date',card]]
    Price = Price.rename(columns = {card:'Price'})
    Price = Price.fillna(method='ffill')
    Price = Price.dropna()
    Resamp['Date'] = Resamp.index
    df_test = Price.merge(Resamp[[card,'Meta_Share']],how='outer', left_on = 'Date', right_on = 'Date')
    df_test = df_test.sort_values(by='Date')
    df_test = df_test.fillna(method='ffill')
    df_test.dropna(inplace=True)
    df_test['Meta_Share'] = df_test['Meta_Share']
    df_test = df_test.fillna(method='ffill')
    df_test['Meta_Share'] = df_test['Meta_Share'].replace(0,method='ffill')
    df_test['Rolling_Meta'] = df_test['Meta_Share'].rolling(window = 7).mean()

    dataset_p = df_test['Price']
    dataset_m = df_test['Meta_Share']
    scaler_p,train_p, test_p, train_p_raw, test_p_raw = prepare_data(dataset_p,n_test, n_lag, n_seq)
    scaler_m,train_m, test_m,train_m_raw,test_m_raw = prepare_data(dataset_m,n_test,n_lag,n_seq)

    train = np.concatenate((train_p[:,:n_lag],train_m[:,:n_lag],train_p[:,n_lag:]),axis=1)
    test = np.concatenate((test_p[:,:n_lag],test_m[:,:n_lag],test_p[:,n_lag:]),axis=1)
    
    info_dict = {'card':card, 'train':train, 'test':test, 
                 'dataset_p':dataset_p, 'dataset_m':dataset_m, 
                 'scaler_p':scaler_p, 'scaler_m':scaler_m,
                'test_p_raw':test_p_raw, 'test_m_raw':test_m_raw}
    All_Data.append(info_dict)
    try:
        FullTrain = np.concatenate((FullTrain,train), axis=0)
        FullTest = np.concatenate((FullTest,test), axis=0)
    except:
        FullTrain = train
        FullTest = test



## Fitting the Model

In [None]:
n_batch = 200
n_epochs = 50
n_neurons = 60


FullTrain = FullTrain[-(len(FullTrain) - len(FullTrain)%n_batch):] #Stateful networks need samples to be divisible by batch size
FullTest = FullTest[-(len(FullTest) - len(FullTest)%n_batch):] #Stateful networks need samples to be divisible by batch size

model,histories = fit_lstm(FullTrain,n_lag,n_seq,n_batch, n_epochs,n_neurons,FullTest)

In [378]:
#Only Metashare/price
n_lag = 30
n_seq = 7
n_test = int(len(dataset_p)/6)
n_batch = 1
n_epochs = 50
n_neurons = 4

#Generate Model
scaler_p,train_p, test_p, train_p_raw,test_p_raw = prepare_data(dataset_p,n_test, n_lag, n_seq)
scaler_m,train_m, test_m,train_m_raw,test_m_raw = prepare_data(dataset_m,n_test,n_lag,n_seq)
#forecasts = make_forecasts(model,n_batch,train,test,n_lag,n_seq)
train = np.concatenate((train_m[:,:n_lag],train_p[:,n_lag:]),axis=1)
test = np.concatenate((test_m[:,:n_lag],test_p[:,n_lag:]),axis=1)
#model,histories = fit_lstm(train,n_lag//2,n_seq,n_batch, n_epochs,n_neurons,test)



# Plotting Losses
Plots the training and validation dataset losses for each training epoch for diagnosing possible overfitting. 

In [208]:
v_l = [j.history['val_loss'][0]/2 for j in histories]
t_l = [j.history['loss'][0]*5 for j in histories]

plt.plot(t_l)
plt.plot(v_l)
print(v_l.index(min(v_l)))
print(min(t_l))

## Saving Model Weights

In [278]:
model.save_weights('Train_on_All.h5')

## Reload Model
Here, the model is re-initialized from the saved weights as a work around issues with batching. By default, LSTM can only predict in batches that are identical to the training batch size. Here the model is reloaded with a "training" batch size of 1, but with the same weights that were saved in the previous cell. This allows us to predict one entry at a time.

In [212]:
model = Sequential()
model.add(LSTM(n_neurons, batch_input_shape=(1, 1, n_lag*2),
               kernel_initializer='random_uniform', return_sequences=True, stateful=True))
#model.add(Conv1D(filters = 32, kernel_size = [5], padding='same'))
model.add(LSTM(n_neurons-1, stateful=True))
#model.add(LSTM(n_neurons))
#model.add(Dropout(0.4))
model.add(Dense(n_seq))
model.compile(loss=['mean_squared_error'], optimizer='adam')
model.load_weights('Train_on_16.h5')

## RMSE Comparisons
Choose a card in the data set. These cells will output the RMSE for that particular card over the validation set.

In [None]:
val_card = cards.index('Tarmogoyf')

In [277]:
test_forecasts = make_forecasts(model,n_batch,train,All_Data[val_card]['test'],n_lag,n_seq)
#train_forecasts = make_forecasts(model,n_batch,train,train,n_lag,n_seq)

test_predictions = inverse_transform(np.array(All_Data[val_card]['test_p_raw'][:,-n_seq-1]),test_forecasts,scaler_m)

predictions = test_predictions
t_p = All_Data[val_card]['test_p_raw'][:,-n_seq:]
for i in range(0,len(predictions[0][0])):
    p = np.array([j[0][i] for j in predictions])
    t = np.array([j[i] for j in t_p])
    print('Days Ahead: ' + str(i+1) + ', RMSE (USD): ' + str(np.sqrt(np.mean((p-t)**2))))

Days Ahead: 1, RMSE (USD): 0.9966643178047749
Days Ahead: 2, RMSE (USD): 1.1575291615746464
Days Ahead: 3, RMSE (USD): 1.3063169534740924
Days Ahead: 4, RMSE (USD): 1.4314239537328344
Days Ahead: 5, RMSE (USD): 1.5460610916649307
Days Ahead: 6, RMSE (USD): 1.6317380887932937
Days Ahead: 7, RMSE (USD): 1.7302835850286142


## RMSE Comparison
The model that I seemed to perform best in terms of RMSE was a linear ridge regression on the pricing data. Here we train a ridge regressor and can compare the RMSE of the two models. For most cards, the LSTM NN provides 5-12% reduction in the RMSE for most prediction windows 

Occasionally there is little difference bewteen the two models. This may be due to modern tournament play not being a primary driver of demand in the validation window for that particular card. 

In [275]:
from sklearn.linear_model import Ridge

#Training a Ridge regressor and pr
def prepare_lin_data(series, n_test, n_lag, n_seq):
    # extract raw values
    raw_values = series.values.reshape(-1,1)
    # transform into supervised learning problem X, y
    supervised = series_to_supervised(raw_values, n_lag, n_seq)
    supervised_values = supervised.values
    # split into train and test sets
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return train, test

train_r, test_r = prepare_lin_data(All_Data[val_card]['dataset_p'],n_test, n_lag, n_seq)

# reshape training into [samples, timesteps, features]
X, y = train_r[:, 0:n_lag], train_r[:, n_lag:]
X_val, y_val = test_r[:, 0:n_lag], test_r[:, n_lag:]
X = X.reshape(X.shape[0], X.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
# 
Ridgemodel = Ridge(alpha=.1)
Ridgemodel.fit(X,y.reshape(-1,n_seq))
RidgetestPredict = np.array([i for i in Ridgemodel.predict(test_r[:,:n_lag])])
RidgetrainPredict = np.array([i for i in Ridgemodel.predict(train_r[:,:n_lag])])

predictions = RidgetestPredict
t_p = test_r[:,-n_seq:]
for i in range(0,len(predictions[0])):
    p = np.array([j[i] for j in predictions])
    t = np.array([j[i] for j in t_p])
    print('Days Ahead: ' + str(i+1) + ', RMSE (USD): ' + str(np.sqrt(np.mean((p-t)**2))))

Days Ahead: 1, RMSE (USD): 0.9717002946090008
Days Ahead: 2, RMSE (USD): 1.15925736970947
Days Ahead: 3, RMSE (USD): 1.3325487732983992
Days Ahead: 4, RMSE (USD): 1.4801430471722685
Days Ahead: 5, RMSE (USD): 1.6137028092512138
Days Ahead: 6, RMSE (USD): 1.7075421005911933
Days Ahead: 7, RMSE (USD): 1.8024147733603206
