In [1]:
import pandas as pd
import numpy as np
import requests
# from api_key import key
from datetime import date
from dateutil.relativedelta import relativedelta

In [None]:
#date 
#open 
#close 
#volume
#input shape = 3
#6 months - 12 months

#AAPL, SBUX, GOOG

In [96]:
def get_stock_data(ticker, multiplier, timespan, from_date, to_date):
    # Make the API request
    api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
    response = requests.get(api_url, params={"apiKey": "wQ5FjyMjpTSO2j5vBxbLuIp72hwYd5E5"})

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Error {response.status_code}: {response.text}")

    # Extract the data from the response
    data = response.json()["results"]
    stock_data = []
    for d in data:
        stock_data.append({
            "Date": pd.to_datetime(d["t"], unit='ms').date(),
            "Open": d["o"],
            "High": d["h"],
            "Low": d["l"],
            "Close": d["c"],
            "Adj Close": d["c"],
            "Volume": d["v"],
        })
     
     # Convert the data to a dataframe
    df = pd.DataFrame(stock_data)
    df = df.reset_index(drop=True)
    
    return df

In [111]:
data = get_stock_data('AAPL', 1, 'day', '2022-02-28', '2023-02-28')

data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-02-28,163.06,165.42,162.43,165.12,165.12,95056629.0
1,2022-03-01,164.695,166.6,161.97,163.2,163.2,83468865.0
2,2022-03-02,164.39,167.36,162.95,166.56,166.56,79724750.0
3,2022-03-03,168.47,168.91,165.55,166.23,166.23,76678441.0
4,2022-03-04,164.49,165.55,162.1,163.17,163.17,83819592.0
5,2022-03-07,163.36,165.02,159.04,159.3,159.3,96418845.0
6,2022-03-08,158.82,162.88,155.8,157.44,157.44,131139480.0
7,2022-03-09,161.475,163.41,159.41,162.95,162.95,91445405.0
8,2022-03-10,160.2,160.39,155.98,158.52,158.52,105342033.0
9,2022-03-11,158.93,159.28,154.5,154.73,154.73,96917302.0


In [112]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       252 non-null    object 
 1   Open       252 non-null    float64
 2   High       252 non-null    float64
 3   Low        252 non-null    float64
 4   Close      252 non-null    float64
 5   Adj Close  252 non-null    float64
 6   Volume     252 non-null    float64
dtypes: float64(6), object(1)
memory usage: 13.9+ KB


In [98]:
split = int(round(df.size*0.7,0))
train_size = split
test_size = df.size - split

data_train = df[:split]
data_test = df[split:]

In [72]:
## Preprocessing

In [119]:
def subsample_sequence(sequence, length, horizon):

    # Bounds of sampling
    last_possible = len(sequence) - length - horizon
    random_start = np.random.randint(0, last_possible)

    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Close','Date', 'index']) # Remove target closing price & date
    y = sequence.loc[random_start+length+horizon-1]['Close'] # Target is closing price

    return X, y


subsample_sequence(df, 3, 1)

(       Open     High     Low  Adj Close       Volume
 62  148.070  149.046  144.13     144.80  109161970.0
 63  148.200  157.500  147.82     155.74  164762371.0
 64  153.155  154.240  151.92     153.34   97932271.0,
 150.65)

In [120]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [121]:
X_train, y_train = get_X_y(sequence=data_train,
               length=100,
               horizon=1,
               number_of_samples=10000)

In [123]:
# 10000 subsamples, each of lenght 100 , each containg 5 features
X_train.shape

(10000, 100, 5)

In [124]:
y_train = y_train.reshape(10000,1)

In [2]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(10, return_sequences=True, activation='tanh', input_shape=(500,5)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(10, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(5, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 500, 10)           640       
                                                                 
 lstm_1 (LSTM)               (None, 500, 20)           2480      
                                                                 
 lstm_2 (LSTM)               (None, 500, 10)           1240      
                                                                 
 dense (Dense)               (None, 500, 5)            55        
                                                                 
 dense_1 (Dense)             (None, 500, 1)            6         
                                                                 
Total params: 4,421
Trainable params: 4,421
Non-trainable params: 0
_________________________________________________________________
