<a href="https://colab.research.google.com/github/dt2229/sandp500/blob/main/PFE_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('white')

import statsmodels.api as sm
import statsmodels.tsa.stattools as ts

from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time

In [2]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.1.87-py2.py3-none-any.whl (29 kB)
Collecting requests>=2.26
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 409 kB/s 
Installing collected packages: requests, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
Successfully installed requests-2.28.1 yfinance-0.1.87


In [3]:
import yfinance

PFE = yfinance.download (tickers = "PFE", start = "2012-01-03", 
                              end = "2022-10-15", interval = "1d")

[*********************100%***********************]  1 of 1 completed


In [4]:
PFE.reset_index(inplace = True)
PFE['Date'] = PFE['Date'].apply(lambda x: str(x)[:10])
PFE.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2012-01-03,20.740038,20.872866,20.721064,20.844402,14.024997,53124340
1,2012-01-04,20.796965,20.825426,20.635674,20.65465,13.897316,31912485
2,2012-01-05,20.60721,20.60721,20.294119,20.493359,13.788795,52764821
3,2012-01-06,20.60721,20.664137,20.455408,20.464895,13.769644,31613571
4,2012-01-09,20.455408,20.721064,20.40797,20.702087,13.929235,41786041


In [5]:
PFE.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
PFE['ret'] = PFE['Adj Close'].pct_change()
data = PFE[['Date', 'Adj Close']]
data.head()

Unnamed: 0,Date,Adj Close
0,2012-01-03,14.024997
1,2012-01-04,13.897316
2,2012-01-05,13.788795
3,2012-01-06,13.769644
4,2012-01-09,13.929235


In [6]:
data.set_index('Date', inplace = True)
import math

In [7]:
close_prices = data['Adj Close']
values = close_prices.values
training_data_len = math.ceil(len(values)* 0.8)

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(values.reshape(-1,1))
train_data = scaled_data[0: training_data_len, :]

x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [8]:

test_data = scaled_data[training_data_len-60: , : ]
x_test = []
y_test = values[training_data_len:]

for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [9]:
from sklearn.model_selection import TimeSeriesSplit
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

In [10]:
tscv = TimeSeriesSplit(max_train_size=None, n_splits=3)
print(tscv)

mape_lst = []

for train_index, valid_index in tscv.split(x_train):
  X_train, X_valid = x_train[train_index], x_train[valid_index]
  Y_train, Y_valid = y_train[train_index], y_train[valid_index]
  model = keras.Sequential()
  model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
  model.add(layers.LSTM(100, return_sequences=False))
  model.add(layers.Dense(25))
  model.add(layers.Dense(1))
  model.summary()
  model.compile(optimizer='adam', loss='mean_squared_error')
  model.fit(X_train, Y_train, batch_size= 64, epochs=3)
  predictions = model.predict(X_valid)
  #predictions = scaler.inverse_transform(predictions)
  metrics = mape(predictions, Y_valid)
  print(metrics)
  mape_lst.append(metrics)

TimeSeriesSplit(gap=0, max_train_size=None, n_splits=3, test_size=None)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 100)           40800     
                                                                 
 lstm_1 (LSTM)               (None, 100)               80400     
                                                                 
 dense (Dense)               (None, 25)                2525      
                                                                 
 dense_1 (Dense)             (None, 1)                 26        
                                                                 
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________
Epoch 1/3
Epoch 2/3
Epoch 3/3
23.191254834069518
Model: "sequential_1"
_______________________________________

In [11]:
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dense(25))
model.add(layers.Dense(1))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 60, 100)           40800     
                                                                 
 lstm_7 (LSTM)               (None, 100)               80400     
                                                                 
 dense_6 (Dense)             (None, 25)                2525      
                                                                 
 dense_7 (Dense)             (None, 1)                 26        
                                                                 
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size= 1, epochs=3)

Epoch 1/3
Epoch 2/3

In [15]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)



In [16]:
mape(predictions, y_test)

21.174946984069194