In [4]:
import pandas as pd
import numpy as np
import requests
# from api_key import key
from datetime import date
from dateutil.relativedelta import relativedelta

In [5]:
def get_stock_data(ticker, multiplier, timespan, from_date, to_date):
    # Make the API request
    api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
    response = requests.get(api_url, params={"apiKey": "wQ5FjyMjpTSO2j5vBxbLuIp72hwYd5E5"})

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Error {response.status_code}: {response.text}")

    # Extract the data from the response
    data = response.json()["results"]
    stock_data = []
    for d in data:
        stock_data.append({
            "Date": pd.to_datetime(d["t"], unit='ms').date(),
            "Open": d["o"],
            "High": d["h"],
            "Low": d["l"],
            "Close": d["c"],
            "Adj Close": d["c"],
            "Volume": d["v"],
        })
     
     # Convert the data to a dataframe
    df = pd.DataFrame(stock_data)
    df = df.reset_index(drop=True)
    
    return df

In [6]:
data = get_stock_data('AAPL', 1, 'day', '2021-02-28', '2023-02-28')

data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-26,120.35,121.48,118.92,121.21,121.21,94071234.0
1,2021-03-29,121.65,122.58,120.7299,121.39,121.39,80819203.0
2,2021-03-30,120.11,120.4031,118.86,119.9,119.9,84507919.0
3,2021-03-31,121.65,123.52,121.15,122.15,122.15,118323826.0
4,2021-04-01,123.66,124.18,122.49,123.0,123.0,75089134.0
5,2021-04-05,123.87,126.1601,123.07,125.9,125.9,88251175.0
6,2021-04-06,126.5,127.13,125.65,126.21,126.21,80171253.0
7,2021-04-07,125.83,127.92,125.14,127.9,127.9,83256716.0
8,2021-04-08,128.95,130.39,128.52,130.36,130.36,88844591.0
9,2021-04-09,129.8,133.04,129.47,132.995,132.995,106615653.0


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 485 entries, 0 to 484
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       485 non-null    object 
 1   Open       485 non-null    float64
 2   High       485 non-null    float64
 3   Low        485 non-null    float64
 4   Close      485 non-null    float64
 5   Adj Close  485 non-null    float64
 6   Volume     485 non-null    float64
dtypes: float64(6), object(1)
memory usage: 26.6+ KB


In [8]:
split = int(round(data.size*0.7,0))
train_size = split
test_size = data.size - split

data_train = data[:split]
data_test = data[split:]

In [16]:
def subsample_sequence(sequence, length, horizon):

    # Bounds of sampling
    last_possible = len(sequence) - length - horizon
    random_start = np.random.randint(0, last_possible)

    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Open','Date', 'Adj Close']) # Remove target closing price & date
    y = sequence.loc[random_start+length+horizon-1]['Close'] # Target is closing price

    return X, y


subsample_sequence(data, 3, 1)

(       High     Low   Close      Volume
 311  137.06  133.32  135.87  81000488.0
 312  137.76  133.91  135.35  73409234.0
 313  138.59  135.63  138.27  72433768.0,
 141.66)

In [10]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [18]:
X_train, y_train = get_X_y(sequence=data_train,
               length=100,
               horizon=1,
               number_of_samples=10000)

In [19]:
# 10000 subsamples, each of lenght 100 , each containg 4 features
X_train.shape

(10000, 100, 4)

In [20]:
y_train = y_train.reshape(10000,1)

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(15, return_sequences=True, activation='tanh', input_shape=(100,4)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(15, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

In [49]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()

es = EarlyStopping(patience=20)

model.fit(X_train, y_train, 
          epochs=8, 
          batch_size=16, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x2e96b7dc0>