<a href="https://colab.research.google.com/github/graphtrek/stockforecast/blob/main/graphtrek_06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MAE
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from keras.preprocessing.sequence import TimeseriesGenerator

In [28]:
ticker = "QQQ"
#features = ['Close','High','Low','Open']
look_back = 5 #  number of past days we want to use to predict the future.
max_data_size = 730 # 2 years
split_percent = 0.90 # use 90 of the data  for train

In [29]:
url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol='+ticker+'&outputsize=full&apikey=3F4URDEKOPLFH25T'
print(url)
stock_api_response = pd.read_json(url)
stock_api_data = stock_api_response['Time Series (Daily)']
#print(stock_api_data)
stock_api_data = stock_api_data.drop(index=['1. Information','2. Symbol','3. Last Refreshed','4. Output Size','5. Time Zone']);
#print(list(stock_api_data.items()))
data = []
lstm_data = []
split_coefficient = 1

for key, value in stock_api_data.items():
  date = key
  open = float(value.get('1. open'))
  high = float(value.get('2. high'))
  low = float(value.get('3. low'))
  close = float(value.get('4. close'))
  adjusted_close = float(value.get('5. adjusted close'))
  volume = int(value.get('6. volume'))
  divident = float(value.get('7. dividend amount'))

  if float(value.get('8. split coefficient')) > 1:
    split_coefficient = float(value.get('8. split coefficient'))
    
  open = open / split_coefficient
  high = high /split_coefficient
  close = close / split_coefficient
  low = low / split_coefficient
  
  # 3 features
  lstm_data.append([
    close,
    high - low,
    volume
  ])

  data.append([
      date,
      close,
      volume,
      high,
      low,
      open,
      divident
      ])

last_data =  str(data[0][0])

if max_data_size < len(data):
  data = np.flip(data[:max_data_size],axis=0)
  lstm_data = np.flip(lstm_data[:max_data_size],axis=0)
else:
  data = np.flip(data,axis=0)
  lstm_data = np.flip(lstm_data,axis=0)

https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=QQQ&outputsize=full&apikey=3F4URDEKOPLFH25T


In [30]:
np.set_printoptions(formatter={'float': '{: 0.2f}'.format})
print('data ===>', 'size:',len(data), 'max_data_size:', max_data_size, 'shape:', data.shape)
print(data)
print('lstm_data ===>', 'size:',len(lstm_data), 'max_data_size:', max_data_size, 'shape:', lstm_data.shape)
print(lstm_data)

data ===> size: 730 max_data_size: 730 shape: (730, 7)
[['2018-12-04' '165.72' '70594743' ... '165.52' '171.43' '0.0']
 ['2018-12-06' '166.89' '71715526' ... '161.77' '162.46' '0.0']
 ['2018-12-07' '161.38' '80432176' ... '160.86' '166.16' '0.0']
 ...
 ['2021-10-25' '377.93' '32597804' ... '373.56' '375.56' '0.0']
 ['2021-10-26' '379.12' '47191279' ... '377.905' '380.23' '0.0']
 ['2021-10-27' '380.0' '45760496' ... '379.31' '379.58' '0.0']]
lstm_data ===> size: 730 max_data_size: 730 shape: (730, 3)
[[ 165.72  6.39  70594743.00]
 [ 166.89  5.14  71715526.00]
 [ 161.38  6.26  80432176.00]
 ...
 [ 377.93  5.31  32597804.00]
 [ 379.12  4.81  47191279.00]
 [ 380.00  3.84  45760496.00]]


In [31]:
df = pd.DataFrame(data,columns=['Date','Close','Volume','High','Low','Open','Divident'])

df['50MA'] = df['Close'].rolling(50).mean()
df['100MA'] = df['Close'].rolling(100).mean()
df['200MA'] = df['Close'].rolling(200).mean()

split = int(split_percent*len(data))
if len(data) - split < look_back:
  split = look_back
split
#df.head()

df_train = df.iloc[:split]
df_test = df.iloc[split:]

lstm_train_data = lstm_data[:split]
lstm_test_data = lstm_data[split:]

In [38]:
print('df_test ===>', 'size:',len(df_test),'shape:', df_test.shape)
print(df_test.head())

print('df_train ===>', 'size:',len(df_train),'shape:', df_train.shape)
print(df_train.head())

print('lstm_test_data ===>', 'size:',len(lstm_test_data), 'shape:', lstm_test_data.shape)
print(lstm_test_data[:2*(look_back + 1)])

print('lstm_train_data ===>', 'size:',len(lstm_train_data), 'shape:', lstm_train_data.shape)
print(lstm_train_data[:2*(look_back + 1)])

df_test ===> size: 73 shape: (73, 10)
           Date   Close    Volume  ...      50MA     100MA      200MA
657  2021-07-16   357.6  47261961  ...  341.4524  333.2582  318.57755
658  2021-07-19  354.67  64404625  ...  341.9156  333.5636  318.96170
659  2021-07-20  358.79  42183702  ...  342.4074  334.0232  319.34440
660  2021-07-21  361.56  30259860  ...  343.1234  334.4974  319.78065
661  2021-07-22  363.95  30252240  ...  343.8962  334.9010  320.19960

[5 rows x 10 columns]
df_train ===> size: 657 shape: (657, 10)
         Date   Close    Volume    High  ... Divident 50MA 100MA  200MA
0  2018-12-04  165.72  70594743  171.91  ...      0.0  NaN   NaN    NaN
1  2018-12-06  166.89  71715526  166.91  ...      0.0  NaN   NaN    NaN
2  2018-12-07  161.38  80432176  167.12  ...      0.0  NaN   NaN    NaN
3  2018-12-10  163.07  73960758  163.78  ...      0.0  NaN   NaN    NaN
4  2018-12-11  163.61  59058296  165.77  ...      0.0  NaN   NaN    NaN

[5 rows x 10 columns]
lstm_test_data ===> siz

In [40]:
train_generator = TimeseriesGenerator(lstm_train_data, lstm_train_data, length=look_back, batch_size=1)

print('Samples: %d' % len(train_generator))
# print each sample
for i in range(5):
	x, y = train_generator[i]
	print('%s => %s' % (x, y))

Samples: 652
[[[ 165.72  6.39  70594743.00]
  [ 166.89  5.14  71715526.00]
  [ 161.38  6.26  80432176.00]
  [ 163.07  4.37  73960758.00]
  [ 163.61  3.54  59058296.00]]] => [[ 165.05  2.64  53779992.00]]
[[[ 166.89  5.14  71715526.00]
  [ 161.38  6.26  80432176.00]
  [ 163.07  4.37  73960758.00]
  [ 163.61  3.54  59058296.00]
  [ 165.05  2.64  53779992.00]]] => [[ 165.10  2.79  46377864.00]]
[[[ 161.38  6.26  80432176.00]
  [ 163.07  4.37  73960758.00]
  [ 163.61  3.54  59058296.00]
  [ 165.05  2.64  53779992.00]
  [ 165.10  2.79  46377864.00]]] => [[ 161.08  3.11  56547419.00]]
[[[ 163.07  4.37  73960758.00]
  [ 163.61  3.54  59058296.00]
  [ 165.05  2.64  53779992.00]
  [ 165.10  2.79  46377864.00]
  [ 161.08  3.11  56547419.00]]] => [[ 157.43  5.50  74834076.00]]
[[[ 163.61  3.54  59058296.00]
  [ 165.05  2.64  53779992.00]
  [ 165.10  2.79  46377864.00]
  [ 161.08  3.11  56547419.00]
  [ 157.43  5.50  74834076.00]]] => [[ 158.42  2.91  63642120.00]]


In [41]:
test_generator = TimeseriesGenerator(lstm_test_data, lstm_test_data, length=look_back, batch_size=1)

print('Samples: %d' % len(test_generator))
# print each sample
for i in range(5):
	x, y = test_generator[i]
	print('%s => %s' % (x, y))

Samples: 68
[[[ 357.60  5.17  47261961.00]
  [ 354.67  3.14  64404625.00]
  [ 358.79  6.48  42183702.00]
  [ 361.56  3.44  30259860.00]
  [ 363.95  2.19  30252240.00]]] => [[ 368.20  4.57  31960799.00]]
[[[ 354.67  3.14  64404625.00]
  [ 358.79  6.48  42183702.00]
  [ 361.56  3.44  30259860.00]
  [ 363.95  2.19  30252240.00]
  [ 368.20  4.57  31960799.00]]] => [[ 368.49  2.29  28438126.00]]
[[[ 358.79  6.48  42183702.00]
  [ 361.56  3.44  30259860.00]
  [ 363.95  2.19  30252240.00]
  [ 368.20  4.57  31960799.00]
  [ 368.49  2.29  28438126.00]]] => [[ 364.43  8.06  57933148.00]]
[[[ 361.56  3.44  30259860.00]
  [ 363.95  2.19  30252240.00]
  [ 368.20  4.57  31960799.00]
  [ 368.49  2.29  28438126.00]
  [ 364.43  8.06  57933148.00]]] => [[ 365.83  4.21  42066195.00]]
[[[ 363.95  2.19  30252240.00]
  [ 368.20  4.57  31960799.00]
  [ 368.49  2.29  28438126.00]
  [ 364.43  8.06  57933148.00]
  [ 365.83  4.21  42066195.00]]] => [[ 366.48  2.43  23166574.00]]
