In [1]:
import pandas as pd
import numpy as np
import requests
# from api_key import key
from datetime import date
from dateutil.relativedelta import relativedelta

In [3]:
def get_stock_data(ticker, multiplier, timespan, from_date, to_date):
    # Make the API request
    api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
    response = requests.get(api_url, params={"apiKey": "wQ5FjyMjpTSO2j5vBxbLuIp72hwYd5E5"})

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Error {response.status_code}: {response.text}")

    # Extract the data from the response
    data = response.json()["results"]
    stock_data = []
    for d in data:
        stock_data.append({
            "Date": pd.to_datetime(d["t"], unit='ms').date(),
            "Open": d["o"],
            "High": d["h"],
            "Low": d["l"],
            "Close": d["c"],
            "Adj Close": d["c"],
            "Volume": d["v"],
        })
     
     # Convert the data to a dataframe
    df = pd.DataFrame(stock_data)
    df = df.reset_index(drop=True)
    
    return df

In [4]:
data = get_stock_data('SBUX', 1, 'day', '2021-02-28', '2023-02-28')

data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-26,107.25,110.0,107.175,109.9,109.9,6167570.0
1,2021-03-29,109.01,110.25,106.59,109.13,109.13,5167195.0
2,2021-03-30,108.42,110.45,108.28,110.27,110.27,4552862.0
3,2021-03-31,110.33,111.6199,109.19,109.27,109.27,6478401.0
4,2021-04-01,110.06,110.06,108.2,109.38,109.38,5792950.0
5,2021-04-05,109.91,111.35,109.87,111.02,111.02,6913105.0
6,2021-04-06,111.38,113.82,111.095,113.15,113.15,6745242.0
7,2021-04-07,112.91,113.76,112.65,113.19,113.19,5629622.0
8,2021-04-08,113.65,113.96,112.6799,113.04,113.04,5011290.0
9,2021-04-09,112.91,113.27,111.97,113.18,113.18,4114258.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 485 entries, 0 to 484
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       485 non-null    object 
 1   Open       485 non-null    float64
 2   High       485 non-null    float64
 3   Low        485 non-null    float64
 4   Close      485 non-null    float64
 5   Adj Close  485 non-null    float64
 6   Volume     485 non-null    float64
dtypes: float64(6), object(1)
memory usage: 26.6+ KB


In [6]:
split = int(round(data.size*0.7,0))
train_size = split
test_size = data.size - split

data_train = data[:split]
data_test = data[split:]

In [17]:
def subsample_sequence(sequence, length, horizon):

    # Bounds of sampling
    last_possible = len(sequence) - length - horizon
    random_start = np.random.randint(0, last_possible)

    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Date', 'Adj Close']) # Remove target closing price & date
    y = sequence.loc[random_start+length+horizon-1]['Close'] # Target is closing price

    return X, y


subsample_sequence(data, 3, 1)

(        Open    High     Low   Close     Volume
 39  111.6300  112.22  110.57  110.92  5146620.0
 40  111.7980  113.03  111.52  112.52  5436295.0
 41  112.9152  113.15  112.18  112.63  5390837.0,
 112.85)

In [18]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [19]:
X_train, y_train = get_X_y(sequence=data_train,
               length=100,
               horizon=1,
               number_of_samples=10000)

In [21]:
# 10000 subsamples, each of lenght 100 , each containg 5 features
X_train.shape

(10000, 100, 5)

In [22]:
y_train = y_train.reshape(10000,1)

In [23]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(15, return_sequences=True, activation='tanh', input_shape=(100,5)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(15, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_17 (LSTM)              (None, 100, 15)           1260      
                                                                 
 lstm_18 (LSTM)              (None, 100, 20)           2880      
                                                                 
 lstm_19 (LSTM)              (None, 100, 15)           2160      
                                                                 
 dense_10 (Dense)            (None, 100, 10)           160       
                                                                 
 dense_11 (Dense)            (None, 100, 1)            11        
                                                                 
Total params: 6,471
Trainable params: 6,471
Non-trainable params: 0
_________________________________________________________________


In [24]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()

es = EarlyStopping(patience=20)

model.fit(X_train, y_train, 
          epochs=8, 
          batch_size=16, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x29a4b7940>