## Project Overview and Import Libraries

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
pd.options.mode.chained_assignment = None
import seaborn as sns
from matplotlib.pylab import rcParams
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

%matplotlib inline

sns.set(style='whitegrid', palette='muted')
rcParams['figure.figsize'] = 14, 8
np.random.seed(1)
tf.random.set_seed(1)

print('Tensorflow version:', tf.__version__)

## Load and Inspect the S&P 500 Index Data

[Data Source](https://www.kaggle.com/pdquant/sp500-daily-19862018): S&P500 Daily Prices 1986 - 2018

In [None]:
df = pd.read_csv('S&P_500_Index_Data.csv' , parse_dates = ['date'])
df.head()

In [None]:
df.shape

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.date , y = df.close , mode = 'lines' , name = 'close'))
fig.update_layout(showlegend = True)
fig.show()

## Data Preprocessing

In [None]:
train_size = int(len(df)*0.8)
test_size = len(df) - train_size 
train , test = df.iloc[0:train_size] , df.iloc[train_size:len(df)]
print(train.shape , test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler=scaler.fit(train[['close']])
train['close'] = scaler.transform(train[['close']])
test['close'] = scaler.transform(test[['close']])

## Create Training and Test Splits

In [None]:
def create_sequences (X,y,time_step=30):
    Xs , ys = [],[]
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i+time_steps)].values)
        ys.append(y.iloc[i+time_steps])
    return np.array(Xs) , np.array(ys)

In [None]:
time_steps = 30 
X_train , y_train = create_sequences(train[['close']] , train.close , time_steps)
X_test , y_test = create_sequences(test[['close']] , test.close , time_steps)
print(X_train.shape)

## Build an LSTM Autoencoder

In [None]:
timesteps = X_train.shape[1]
num_features = X_train.shape[2]

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed

model = Sequential([LSTM(128 , input_shape=(timesteps , num_features)),Dropout(0.5),RepeatVector(timesteps),LSTM(128,return_sequences=True),Dropout(0.5),TimeDistributed(Dense(num_features))])

model.compile(loss='mae' , optimizer='adam')

model.summary()

## Train the Autoencoder

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss' , patience = 3 , mode='min')
history = model.fit(X_train,y_train,epochs=100,batch_size=128,validation_split=0.15,callbacks=[es],shuffle=False)

## Plot Metrics and Evaluate the Model

## Detect Anomalies in the S&P 500 Index Data