In [4]:
import pandas as pd
import numpy as np
import requests
# from api_key import key
from datetime import date
from dateutil.relativedelta import relativedelta

In [5]:
def get_stock_data(ticker, multiplier, timespan, from_date, to_date):
    # Make the API request
    api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
    response = requests.get(api_url, params={"apiKey": "wQ5FjyMjpTSO2j5vBxbLuIp72hwYd5E5"})

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Error {response.status_code}: {response.text}")

    # Extract the data from the response
    data = response.json()["results"]
    stock_data = []
    for d in data:
        stock_data.append({
            "Date": pd.to_datetime(d["t"], unit='ms').date(),
            "Open": d["o"],
            "High": d["h"],
            "Low": d["l"],
            "Close": d["c"],
            "Adj Close": d["c"],
            "Volume": d["v"],
        })
     
     # Convert the data to a dataframe
    df = pd.DataFrame(stock_data)
    df = df.reset_index(drop=True)
    
    return df

In [6]:
data = get_stock_data('SBUX', 1, 'day', '2021-02-28', '2023-02-28')

data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-29,109.01,110.25,106.59,109.13,109.13,5167195.0
1,2021-03-30,108.42,110.45,108.28,110.27,110.27,4552862.0
2,2021-03-31,110.33,111.6199,109.19,109.27,109.27,6478401.0
3,2021-04-01,110.06,110.06,108.2,109.38,109.38,5792950.0
4,2021-04-05,109.91,111.35,109.87,111.02,111.02,6913105.0
5,2021-04-06,111.38,113.82,111.095,113.15,113.15,6745242.0
6,2021-04-07,112.91,113.76,112.65,113.19,113.19,5629622.0
7,2021-04-08,113.65,113.96,112.6799,113.04,113.04,5011290.0
8,2021-04-09,112.91,113.27,111.97,113.18,113.18,4114258.0
9,2021-04-12,113.02,113.89,112.77,113.81,113.81,4707805.0


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 484 entries, 0 to 483
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       484 non-null    object 
 1   Open       484 non-null    float64
 2   High       484 non-null    float64
 3   Low        484 non-null    float64
 4   Close      484 non-null    float64
 5   Adj Close  484 non-null    float64
 6   Volume     484 non-null    float64
dtypes: float64(6), object(1)
memory usage: 26.6+ KB


In [8]:
split = int(round(data.shape[0]*0.7, 0))
train_size = split
test_size = data.shape[0] - split

data_train = data[:split]
data_test = data[split:]

In [9]:
data_train = data_train.reset_index(drop=True)
data_test = data_test.reset_index(drop=True)

In [10]:
def subsample_sequence(sequence, length, horizon):
    
    # Randomly select a starting point for subsample
    random_start = np.random.randint(0, len(sequence) - length - horizon)

    
    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Open','Date', 'Adj Close']) # Remove target closing price & date
    y = sequence.loc[random_start + length + horizon - 1, 'Close']
    
    return X, y

subsample_sequence(data, 3, 1)

(        High     Low   Close     Volume
 18  117.9800  115.84  117.56  4559282.0
 19  117.2500  115.06  115.92  5924931.0
 20  116.3899  115.27  116.15  7260927.0,
 112.4)

In [11]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [12]:
X_train, y_train = get_X_y(sequence=data_train,
               length=100,
               horizon=1,
               number_of_samples=10000)

In [13]:
# 10000 subsamples, each of lenght 100 , each containg 5 features
X_train.shape

(10000, 100, 4)

In [19]:
y_train = y_train.reshape(10000,1)

In [20]:
X_test, y_test = get_X_y(sequence=data_test,
                          length=100,
                          horizon=1,
                          number_of_samples=1000)

In [23]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(15, return_sequences=True, activation='tanh', input_shape=(100,4)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(15, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_15 (LSTM)              (None, 100, 15)           1200      
                                                                 
 lstm_16 (LSTM)              (None, 100, 20)           2880      
                                                                 
 lstm_17 (LSTM)              (None, 100, 15)           2160      
                                                                 
 dense_10 (Dense)            (None, 100, 10)           160       
                                                                 
 dense_11 (Dense)            (None, 100, 1)            11        
                                                                 
Total params: 6,411
Trainable params: 6,411
Non-trainable params: 0
_________________________________________________________________


In [24]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()

es = EarlyStopping(patience=20)

model.fit(X_train, y_train, 
          epochs=8, 
          batch_size=16, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x28ad62230>

In [25]:
from scipy import stats

def predict_next_day_price(model, X_test, confidence=0.95):
    # Use the trained model to make predictions on the test set
    y_pred = model.predict(X_test)
    # Calculate the mean of all the predictions
    mean_prediction = np.mean(y_pred)
    # Calculate the standard deviation of the predictions
    std_deviation = np.std(y_pred)
    # Calculate the confidence interval
    interval = stats.norm.interval(confidence, loc=mean_prediction, scale=std_deviation)
    lower_bound, upper_bound = interval
    # Return the mean prediction and confidence interval as a dictionary
    return {'mean_prediction': mean_prediction, 'lower_bound': lower_bound, 'upper_bound': upper_bound}

In [26]:
test_predictions = predict_next_day_price(model, X_test)
print(test_predictions)

{'mean_prediction': 95.68415, 'lower_bound': 91.48511679435573, 'upper_bound': 99.88318459724583}
