In [None]:
import pandas as pd
stock_data = pd.read_csv('./NFLX.csv',index_col='Date')
stock_data.head()

In [None]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import datetime as dt

def plot_curve(dates, data1, lab1, data2, lab2, xlab, ylab):
    plt.figure(figsize=(15,10))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter ('%Y-%m-%d'))
    plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
    x_dates = [dt.datetime.strptime(d, '%d-%b-%y').date() for d in dates]
    plt.plot(x_dates, data1, label=lab1)
    plt.plot(x_dates, data2, label=lab2)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

In [None]:
plot_curve(dates=stock_data.index.values, 
           data1=stock_data['High'], lab1='High', 
           data2=stock_data['Low'], lab2='Low', 
           xlab='Time Scale', ylab='Scaled USD')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

In [None]:
target_y = stock_data['Close']    # target is close
X_feat = stock_data.iloc[:,0:3]   # input parameters are open, high, low

# Feature Scaling
sc = StandardScaler()
stock_data_ft = sc.fit_transform(X_feat.values)
stock_data_ft = pd.DataFrame(columns=X_feat.columns, data=stock_data_ft, index=X_feat.index)

In [None]:
def lstm_split(data, n_steps):
    x, y = [], []
    for i in range(len(data)-n_steps+1):
        x.append(data[i:i + n_steps, : -1])  # taking only open and high of n_steps rows
        y.append(data[i + n_steps-1, -1])
    return np.array(x), np.array(y)

In [None]:
x1, y1 = lstm_split(stock_data_ft.values, n_steps=2)

train_split = 0.2
split_idx = int(np.ceil(len(x1)*train_split))
date_index = stock_data_ft.index
X_train, X_test = x1[split_idx:], x1[:split_idx]
y_train, y_test = y1[split_idx:], y1[:split_idx]
X_train_date, X_test_date = date_index[split_idx:], date_index[:split_idx]
print(x1.shape, X_train.shape, X_test.shape, y_test.shape)

In [None]:
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
lstm.add(LSTM(32, activation='relu'))
lstm.add(Dense(1))
lstm.compile(loss= 'mean_squared_error', optimizer='adam')
lstm.summary()

In [None]:
history = lstm.fit(X_train, y_train, epochs=100, batch_size=4, verbose=2, shuffle=False)

In [None]:
y_pred = lstm.predict(X_test)

In [None]:
print(y_test.shape)
print(y_test[0])
print(len(y_pred))
print(y_pred[0])

print(X_test_date.shape)
print(X_test_date[0], X_test_date[-1])

In [None]:
plot_curve(dates=X_test_date, 
           data1=y_test, lab1='True Value', 
           data2=y_pred, lab2='LSTM Value', 
           xlab='Time Scale', ylab='Scaled USD')

In [None]:
rsme = mean_squared_error(y_test, y_pred, squared=False)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("RSME: ", rsme)
print("MAPE: ", mape)

### Testing for LSTM with more parameters

In [None]:
lstm = Sequential()
lstm.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
lstm.add(LSTM(64, activation='relu'))
lstm.add(Dense(1))
lstm.compile(loss= 'mean_squared_error', optimizer='adam')
lstm.summary()

In [None]:
history = lstm.fit(X_train, y_train, epochs=100, batch_size=4, verbose=2, shuffle=False)

In [None]:
y_pred = lstm.predict(X_test)

In [None]:
plot_curve(dates=X_test_date, 
           data1=y_test, lab1='True Value', 
           data2=y_pred, lab2='LSTM Value', 
           xlab='Time Scale', ylab='Scaled USD')

In [None]:
rsme = mean_squared_error(y_test, y_pred, squared=False)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("RSME: ", rsme)
print("MAPE: ", mape)

### Testing with increased window size(history) to 10

In [None]:
x1, y1 = lstm_split(stock_data_ft.values, n_steps=10)

train_split = 0.2
split_idx = int(np.ceil(len(x1)*train_split))
date_index = stock_data_ft.index
X_train, X_test = x1[split_idx:], x1[:split_idx]
y_train, y_test = y1[split_idx:], y1[:split_idx]
X_train_date, X_test_date = date_index[split_idx:], date_index[:split_idx]
print(x1.shape, X_train.shape, X_test.shape, y_test.shape)

In [None]:
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
lstm.add(LSTM(32, activation='relu'))
lstm.add(Dense(1))
lstm.compile(loss= 'mean_squared_error', optimizer='adam')
lstm.summary()

In [None]:
history = lstm.fit(X_train, y_train, epochs=100, batch_size=4, verbose=2, shuffle=False)

In [None]:
y_pred = lstm.predict(X_test)

In [None]:
print(y_test.shape)
print(y_test[0])
print(len(y_pred))
print(y_pred[0])

print(X_test_date.shape)
print(X_test_date[0], X_test_date[-1])

In [None]:
plot_curve(dates=X_test_date, 
           data1=y_test, lab1='True Value', 
           data2=y_pred, lab2='LSTM Value', 
           xlab='Time Scale', ylab='Scaled USD')

In [None]:
rsme = mean_squared_error(y_test, y_pred, squared=False)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("RSME: ", rsme)
print("MAPE: ", mape)

### Testing for LSTM with more parameters

In [None]:
lstm = Sequential()
lstm.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
lstm.add(LSTM(64, activation='relu'))
lstm.add(Dense(1))
lstm.compile(loss= 'mean_squared_error', optimizer='adam')
lstm.summary()

In [None]:
history = lstm.fit(X_train, y_train, epochs=100, batch_size=4, verbose=2, shuffle=False)

In [None]:
y_pred = lstm.predict(X_test)

In [None]:
plot_curve(dates=X_test_date, 
           data1=y_test, lab1='True Value', 
           data2=y_pred, lab2='LSTM Value', 
           xlab='Time Scale', ylab='Scaled USD')

In [None]:
rsme = mean_squared_error(y_test, y_pred, squared=False)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("RSME: ", rsme)
print("MAPE: ", mape)

In [None]:
train_split = 0.8
split_idx = int(np.ceil(len(stock_data)*train_split))
train = stock_data[['Close']].iloc[:split_idx]
test = stock_data[['Close']].iloc[split_idx:]
test_pred = np.array([train.rolling(10).mean().iloc[-1]]*len(test)).reshape((-1,1))

print( 'Test RMSE: %.3f' % mean_squared_error(test, test_pred, squared=False))
print('Test MAPE: %.3f' % mean_absolute_percentage_error(test, test_pred))

plt.figure(figsize=(10,5))
plt.plot(test)
plt.plot(test_pred)
plt.show()

In [None]:
from statsmodels.tsa.api import SimpleExpSmoothing

X = stock_data[['Close']].values
train_split = 0.8
split_idx = int(np.ceil(len(X)*train_split))
train = X[:split_idx]
test = X[split_idx:]
test_concat = np.array([]).reshape((0,1))

for i in range(len(test)):
    train_fit = np.concatenate((train, np.asarray(test_concat)))
    fit = SimpleExpSmoothing(np.asarray(train_fit)).fit(smoothing_level=0.2, optimized=False)
    test_pred = fit. forecast(1)
    test_concat = np.concatenate((np.asarray(test_concat), test_pred.reshape((-1,1))))

print('Test RMSE: %.3f' % mean_squared_error(test, test_concat, squared=False))
print('Test MAPE: %.3f' % mean_absolute_percentage_error(test, test_concat) )

plt.figure(figsize=(10,5))
plt.plot(test)
plt.plot(test_pred)
plt.plot(test_concat)
plt.show()