In [1]:
import pandas as pd
import numpy as np
import requests
from datetime import date, datetime, timedelta

In [2]:
def get_stock_data(ticker, multiplier, timespan, from_date, to_date):
    # Make the API request
    api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
    response = requests.get(api_url, params={"apiKey": "wQ5FjyMjpTSO2j5vBxbLuIp72hwYd5E5"})

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Error {response.status_code}: {response.text}")

    # Extract the data from the response
    data = response.json()["results"]
    stock_data = []
    for d in data:
        stock_data.append({
            "Date": pd.to_datetime(d["t"], unit='ms').date(),
            "Open": d["o"],
            "High": d["h"],
            "Low": d["l"],
            "Close": d["c"],
            "Adj Close": d["c"],
            "Volume": d["v"],
        })
     
     # Convert the data to a dataframe
    df = pd.DataFrame(stock_data)
    df = df.reset_index(drop=True)
    
    return df

In [3]:
end_date = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
start_date = (datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=365*2-1)).strftime('%Y-%m-%d')

data = get_stock_data('AAPL', 1, 'day', start_date, end_date)

data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-29,121.65,122.58,120.7299,121.39,121.39,80819203.0
1,2021-03-30,120.11,120.4031,118.86,119.9,119.9,84507919.0
2,2021-03-31,121.65,123.52,121.15,122.15,122.15,118323826.0
3,2021-04-01,123.66,124.18,122.49,123.0,123.0,75089134.0
4,2021-04-05,123.87,126.1601,123.07,125.9,125.9,88251175.0
5,2021-04-06,126.5,127.13,125.65,126.21,126.21,80171253.0
6,2021-04-07,125.83,127.92,125.14,127.9,127.9,83256716.0
7,2021-04-08,128.95,130.39,128.52,130.36,130.36,88844591.0
8,2021-04-09,129.8,133.04,129.47,132.995,132.995,106615653.0
9,2021-04-12,132.52,132.85,130.63,131.24,131.24,91388983.0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 502 entries, 0 to 501
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       502 non-null    object 
 1   Open       502 non-null    float64
 2   High       502 non-null    float64
 3   Low        502 non-null    float64
 4   Close      502 non-null    float64
 5   Adj Close  502 non-null    float64
 6   Volume     502 non-null    float64
dtypes: float64(6), object(1)
memory usage: 27.6+ KB


In [87]:
split = int(round(data.shape[0]*0.7, 0))
train_size = split
test_size = data.shape[0] - split

data_train = data[:split]
data_test = data[split:]

In [88]:
data_train = data_train.reset_index(drop=True)
data_test = data_test.reset_index(drop=True)

In [89]:
def subsample_sequence(sequence, length, horizon):
    
    # Randomly select a starting point for subsample
    random_start = np.random.randint(0, len(sequence) - length - horizon)

    
    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Open','Date', 'Adj Close']) # Remove target closing price & date
    y = sequence.loc[random_start + length + horizon - 1, 'Close']
    
    return X, y

subsample_sequence(data, 3, 1)

(      High     Low   Close      Volume
 93  147.71  145.30  145.60  68286400.0
 94  146.72  145.53  145.86  48493463.0
 95  149.05  145.84  148.89  73771613.0,
 149.1)

In [90]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [92]:
X_train, y_train = get_X_y(sequence=data_train,
               length=100,
               horizon=1,
               number_of_samples=10000)

In [93]:
# 10000 subsamples, each of lenght 100 , each containg 4 features
X_train.shape

(10000, 100, 4)

In [94]:
y_train = y_train.reshape(10000,1)

In [95]:
print(data_train.index)

RangeIndex(start=0, stop=351, step=1)


In [96]:
len(data_test)

151

In [97]:
X_test, y_test = get_X_y(sequence=data_test,
                          length=100,
                          horizon=1,
                          number_of_samples=1000)

In [98]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(15, return_sequences=True, activation='tanh', input_shape=(100,4)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(15, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 100, 15)           1200      
                                                                 
 lstm_7 (LSTM)               (None, 100, 20)           2880      
                                                                 
 lstm_8 (LSTM)               (None, 100, 15)           2160      
                                                                 
 dense_4 (Dense)             (None, 100, 10)           160       
                                                                 
 dense_5 (Dense)             (None, 100, 1)            11        
                                                                 
Total params: 6,411
Trainable params: 6,411
Non-trainable params: 0
_________________________________________________________________


In [99]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()

es = EarlyStopping(patience=20)

model.fit(X_train, y_train, 
          epochs=8, 
          batch_size=16, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x293fc6cb0>

In [109]:
#def predict_next_day_price(model, X_test):
    # Use the trained model to make predictions on the test set
 #   y_pred = model.predict(X_test)
    # Calculate the mean of all the predictions
 #   next_day_prediction = np.mean(y_pred)
  #  return next_day_prediction

In [116]:
from scipy import stats

def predict_next_day_price(model, X_test, confidence=0.95):
    # Use the trained model to make predictions on the test set
    y_pred = model.predict(X_test)
    # Calculate the mean of all the predictions
    mean_prediction = np.mean(y_pred)
    # Calculate the standard deviation of the predictions
    std_deviation = np.std(y_pred)
    # Calculate the confidence interval
    interval = stats.norm.interval(confidence, loc=mean_prediction, scale=std_deviation)
    lower_bound, upper_bound = interval
    # Return the mean prediction and confidence interval as a dictionary
    return {'mean_prediction': mean_prediction, 'lower_bound': lower_bound, 'upper_bound': upper_bound}

In [117]:
test_predictions = predict_next_day_price(model, X_test)
print(test_predictions)

{'mean_prediction': 157.19307, 'lower_bound': 150.2919929897423, 'upper_bound': 164.0941459262733}
