# Prophet & GRU Ensemble

Testing the efficacy of ensembling Prophet and GRU forcasts for the High, Low, and Close (HLC) price of given stock(s). 

The HLC forcasts can be pushed to Telegram given a BOT token and Chat ID

Included forward testing of the ensemble at the end of the notebook




## Install & Import Dependencies


In [None]:
!pip install datasets
!pip install transformers
!pip install huggingface_hub
!pip install git+https://github.com/huggingface/peft.git
!pip install git+https://github.com/huggingface/accelerate.git
!pip install torch
!pip install schedule
!pip install time
!pip install telebot
!pip install telethon
!pip install transformers
!pip install alpha_vantage
!pip install Prophet
!pip install tensorflow

Collecting datasets
  Using cached datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Using cached pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.32.2 (from datasets)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Using cached multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Using cached datasets-2.20.0-py3-none-any.whl (547 kB)
Using cached dill-0.3.8-py3-none-any.whl (116 kB)
Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.9/39.9 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading re

In [None]:
import requests
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import schedule
import time
from transformers import pipeline
import yfinance as yf
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from prophet import Prophet
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

## Telegram Tokens

In [None]:
# Include Telegram bot token and chat ID for pushing forecast results
BOT_TOKEN = "<BOT_TOKEN>"
chat_id = "<CHAT_ID>" 

## Numeric integration

### Data processing & integrating
Call some basic stock data and calculate some standard technical indicators to suppliment the data.

In [None]:
def get_numeric_data(ticker):
    # Define the ticker symbol and time period
    period = "6mo" #must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']"
    interval = "1h"

    # Retrieve the historical market data
    data = yf.download(tickers=ticker, period=period, interval=interval)

    # Retrieve the ticker info
    ticker_info = yf.Ticker(ticker).info

    # Calculate the moving averages
    data['MA_50'] = data['Close'].rolling(window=50).mean()
    data['MA_200'] = data['Close'].rolling(window=200).mean()

    # Calculate the RSI
    delta = data['Close'].diff(1)
    up, down = delta.copy(), delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    roll_up = up.rolling(window=14).mean()
    roll_down = down.rolling(window=14).mean().abs()
    RS = roll_up / roll_down
    data['RSI'] = 100.0 - (100.0 / (1.0 + RS))

    # Calculate the MACD
    data['EMA_12'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['EMA_26'] = data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA_12'] - data['EMA_26']
    data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

    # Create a single dataframe called numeric_df
    numeric_df = data.select_dtypes(include=['number'])

    # Reset the index
    numeric_df.reset_index(inplace=True)

    return numeric_df

#ticker = 'SPY'
#numeric = get_numeric_data(ticker)
#numeric.head()

### Model Fomation & Ensambling

#### Prophet Forcast

In [None]:
def prophet_forcast(target, df, future_days):
    # Select only the necessary columns
    df = df[['Datetime', target]]

    # Rename the columns to match Prophet's requirements
    df = df.rename(columns={'Datetime': 'ds', target: 'y'})

    # Ensure the 'ds' column is of datetime type and remove timezone
    df['ds'] = pd.to_datetime(df['ds']).dt.tz_localize(None)

    params = {'seasonality_mode': 'additive',
              'changepoint_prior_scale': 0.03132627808654428,
              'seasonality_prior_scale': 2.0402731368344673,
              'holidays_prior_scale': 5.869231482077217
             }

    # Create a Prophet model
    model = Prophet(**params)

    # Fit the model to the data
    model.fit(df)

    # Make a forecast for the next 'future_days' days
    future = model.make_future_dataframe(periods=future_days)

    # Make predictions on the future dataframe
    forecast = model.predict(future)

    # Extract the forecasted close prices
    future_close_prices = forecast['yhat'].values[-future_days:]
    model.plot(forecast)
    return future_close_prices

#ticker = 'SPY'
#df = get_numeric_data(ticker)
#future_close_prices = prophet_forcast('Close', df, future_days = 1)
#print("Future prices:", future_close_prices)


#### Train & Forcast with a GRU model

Includes Monte Carlo suimulation averaging for better accuracy

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping

def gru_forcast(target, df, future_days):
    num_mc_simulations=100
    # Load the data
    df = df.copy()

    # Convert the Datetime column to datetime format
    df['Datetime'] = pd.to_datetime(df['Datetime'])

    # Move the Datetime column to the first position
    cols = df.columns.tolist()
    cols.insert(0, cols.pop(cols.index('Datetime')))
    df = df[cols]

    # Create a new dataframe with only the 'Close' column
    close_df = df[[target]]

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_close = scaler.fit_transform(close_df)

    # Prepare the input data for the model
    seq_len = 1
    X_train = []
    y_train = []
    for i in range(seq_len, len(scaled_close)):
        X_train.append(scaled_close[i-seq_len:i])
        y_train.append(scaled_close[i])
    X_train, y_train = np.array(X_train), np.array(y_train)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

    last_seq = scaled_close[-seq_len:]
    last_seq = last_seq.reshape((1, seq_len, 1))

    # Create the GRU model
    model = Sequential()
    model.add(GRU(units=1000, return_sequences=True, input_shape=(seq_len, 1)))
    model.add(Dropout(0.26))
    model.add(GRU(units=200, return_sequences=True))
    model.add(Dropout(0.26))
    model.add(GRU(units=1000, return_sequences=False))
    model.add(Dropout(0.26))
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=64, activation='relu'))
    model.add(Dense(units=1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    early_stopping = EarlyStopping(monitor='loss', patience=5, min_delta=0.001)

    # Train the model
    model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=0, callbacks=[early_stopping])
    model.save('gru_model_pred-for.keras')

    # Make predictions on known data
    predictions = model.predict(X_train)

    # Rescale the predictions
    predictions_rescaled = scaler.inverse_transform(predictions)

    # Plot actual and predicted prices
    plt.plot(close_df[seq_len:], label='Actual')
    plt.plot(predictions_rescaled, label='Predicted')
    plt.legend()
    plt.show()

    #future high
    future_close_prices_mc = []
    for _ in range(num_mc_simulations):
        future_close_prices = []
        last_seq_mc = scaled_close[-seq_len:]
        last_seq_mc = last_seq_mc.reshape((1, seq_len, 1))
        for i in range(future_days):
            predictions = model.predict(last_seq_mc)
            predictions_rescaled = scaler.inverse_transform(predictions)
            future_close_prices.append(predictions_rescaled[0, 0])
            predictions = predictions.reshape((1, 1, 1))  # Reshape predictions to (1, 1, 1)
            last_seq_mc = np.append(last_seq_mc[:, 1:, :], predictions, axis=1)
        future_close_prices_mc.append(future_close_prices)

    # Calculate the Monte Carlo average
    future_close_prices_mc_avg = np.mean(future_close_prices_mc, axis=0)

    # Plot forecasted prices
    plt.plot(close_df[seq_len:], label='Actual')
    plt.plot(predictions_rescaled, label='Predicted')
    plt.plot(range(len(close_df[seq_len:]), len(close_df[seq_len:])+future_days), future_close_prices_mc_avg, label='Forecasted (MC Average)')
    plt.legend()
    plt.show()

    return future_close_prices_mc_avg

#ticker = 'SPY'
#df = get_numeric_data(ticker)
#future_close_prices_mc_avg = gru_forcast('Close',df, future_days=1)
#print("Future High prices (MC Average):", future_close_prices_mc_avg)

### Ensembling

In [None]:
def focast_ensemble(df, future_days, ticker):
  # HLC for prophet
  prophet_high = prophet_forcast('High', df, future_days)
  prophet_low = prophet_forcast('Low', df, future_days)
  prophet_close = prophet_forcast('Close', df, future_days)

  #HLC for GRU
  gru_high = gru_forcast('High', df, future_days)
  gru_low = gru_forcast('Low', df, future_days)
  gru_close = gru_forcast('Close', df, future_days)

  # Compute the average forecasted HLC columns
  avg_high = (prophet_high[0] + gru_high[0]) / 2
  avg_low = (prophet_low[0] + gru_low[0]) / 2
  avg_close = (prophet_close[0] + gru_close[0]) / 2

  # Create a new DataFrame with the average HLC columns
  avg_df = pd.DataFrame({ 'High': [avg_high], 'Low': [avg_low], 'Close': [avg_close]})

  return avg_df

#ticker = 'SPY'
#include_gpt=False
#numeric_df = get_numeric_data(ticker)
#future_prices = focast_ensemble(numeric_df, future_days = 1, ticker=ticker)
#print("Future prices:", future_prices)

### Sentiment and Numeric Forcasting Pipeline

In [None]:
def forcast_pipeline(ticker, include_gpt, future_days):
  numeric_df = get_numeric_data(ticker)
  future_close_prices = focast_ensemble(numeric_df, future_days, ticker)
  return future_close_prices

#ticker_list = ['INQD','GRST']
#include_gpt=False
#future_days=1
#for ticker in ticker_list:
    #prediction = forecast_pipeline(ticker, include_gpt, future_days)
    #send_forcast(ticker, prediction)
    #predictions.append(prediction)  # append each prediction to the list
    #predictions.append({'Ticker': ticker, 'Prediction': prediction})  # append each prediction to the list
    #print(prediction)

## Aggregation with Telegram

In [None]:
def send_forcast(ticker, df, prev_close):

    forecasted_high = round(df['High'][0], 2)
    forecasted_low = round(df['Low'][0], 2)
    forecasted_close = round(df['Close'][0], 2)
    forecasted_high_vs_prev_close_difference = round(forecasted_high - prev_close, 2)

    message = f"Predicted Prices for: \n {ticker}:\n"
    message += f"High: {forecasted_high}\n"
    message += f"Low: {forecasted_low}\n"
    message += f"Close: {forecasted_close}\n"
    message += f"Forecasted High vs Prev Close Difference: {forecasted_high_vs_prev_close_difference}\n"

    url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message}"

    requests.get(url).json()

#ticker = 'SPY'
#include_gpt=False
#future_days=1
#forcasts = forcast_pipeline(ticker, include_gpt, future_days)
#send_forcast(ticker, forcasts)

## Pipeline|
Forecasts the HLC of the given stock(s), calculates teh differences between the forecasted prices and the previosu day prices and saves the data toa .csv file for a runing log forecasts.

In [None]:
import yfinance as yf
import pandas as pd

def active_forcaster_aggregator():
    ticker_list =  ['SPY','FNGU', 'AAPL', 'TSLA'#,
                    #'AXP', 'AMGN', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD',
                    #'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE',
                    #'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS','DOW',
                    #'CURLF', 'INQD','ACB','AUR','GRST'
                    ]
    include_gpt = False #GPTs used in other pipelines
    future_days=1 #How many days to forecast
    predictions = []  # create an empty list to store predictions
    for ticker in ticker_list:
        prediction = forcast_pipeline(ticker, include_gpt, future_days)
        stock_data = yf.download(ticker, period='1d')
        prev_close = round(stock_data['Close'].iloc[-1], 2)
        send_forcast(ticker, prediction, prev_close)
        predictions.append({'Ticker': ticker, 'High': prediction['High'].iloc[0], 'Low': prediction['Low'].iloc[0], 'Close': prediction['Close'].iloc[0]})

    forcasted_df = pd.DataFrame(predictions)

    # create new columns to store the previous day's prices and differences
    forcasted_df['Prev High Price']= None
    forcasted_df['Prev Low Price'] = None
    forcasted_df['Prev Close Price'] = None
    forcasted_df['High Price Difference'] = None
    forcasted_df['High Price Difference Percentage'] = None
    forcasted_df['Low Price Difference'] = None
    forcasted_df['Low Price Difference Percentage'] = None
    forcasted_df['Close Price Difference'] = None
    forcasted_df['Close Price Difference Percentage'] = None
    forcasted_df['Low to High Difference'] = None
    forcasted_df['Low to High Difference Percentage'] = None
    forcasted_df['Forecasted High vs Prev Close Difference'] = None
    forcasted_df['Forecasted High vs Prev Close Difference Percentage'] = None

    # iterate over each row in the DataFrame
    for index, row in forcasted_df.iterrows():
        ticker = row['Ticker']
        forecasted_high_price = round(float(row['High']), 2)
        forecasted_low_price = round(float(row['Low']), 2)
        forecasted_close_price = round(float(row['Close']), 2)

        # fetch the historical data for the ticker for the last available day
        stock_data = yf.download(ticker, period='1d')

        # extract the previous day's high, low, and close prices from the historical data
        prev_high_price = round(stock_data['High'].iloc[-1], 2)
        prev_low_price = round(stock_data['Low'].iloc[-1], 2)
        prev_close_price = round(stock_data['Close'].iloc[-1], 2)

        # update the 'Prev High Price', 'Prev Low Price', and 'Prev Close Price' columns
        forcasted_df.at[index, 'Prev High Price'] = prev_high_price
        forcasted_df.at[index, 'Prev Low Price'] = prev_low_price
        forcasted_df.at[index, 'Prev Close Price'] = prev_close_price

        # calculate the price differences and percentage differences
        high_price_difference = round(forecasted_high_price - prev_high_price, 2)
        high_price_difference_percentage = round((high_price_difference / prev_high_price) * 100, 2)
        low_price_difference = round(forecasted_low_price - prev_low_price, 2)
        low_price_difference_percentage = round((low_price_difference / prev_low_price) * 100, 2)
        close_price_difference = round(forecasted_close_price - prev_close_price, 2)
        close_price_difference_percentage = round((close_price_difference / prev_close_price) * 100, 2)

        low_to_high_difference = round(forecasted_high_price - prev_low_price, 2)
        low_to_high_difference_percentage = round((low_to_high_difference / prev_low_price) * 100, 2)

        # calculate the difference between the previous day's close price and the forecasted high price
        forecasted_high_vs_prev_close_difference = round(forecasted_high_price - prev_close_price, 2)
        forecasted_high_vs_prev_close_difference_percentage = round((forecasted_high_vs_prev_close_difference / prev_close_price) * 100, 2)

        #update the 'High Price Difference', 'High Price Difference Percentage', 'Low Price Difference', 'Low Price Difference Percentage', 'Close Price Difference', and 'Close Price Difference Percentage' columns
        forcasted_df.at[index, 'High Price Difference'] = high_price_difference
        forcasted_df.at[index, 'High Price Difference Percentage'] = high_price_difference_percentage
        forcasted_df.at[index, 'Low Price Difference'] = low_price_difference
        forcasted_df.at[index, 'Low Price Difference Percentage'] = low_price_difference_percentage
        forcasted_df.at[index, 'Close Price Difference'] = close_price_difference
        forcasted_df.at[index, 'Close Price Difference Percentage'] = close_price_difference_percentage
        forcasted_df.at[index, 'Low to High Difference'] = low_to_high_difference
        forcasted_df.at[index, 'Low to High Difference Percentage'] = low_to_high_difference_percentage
        forcasted_df.at[index, 'Forecasted High vs Prev Close Difference'] = forecasted_high_vs_prev_close_difference
        forcasted_df.at[index, 'Forecasted High vs Prev Close Difference Percentage'] = forecasted_high_vs_prev_close_difference_percentage

    # reset the index to a single index
    forcasted_df = forcasted_df.reset_index()

    forcasted_df.to_csv('forward_test_df.csv', index=False)
    return  forcasted_df

#forcasted = active_forcaster_aggregator()
#forcasted.head()

## Push Forecasts Telegram

In [None]:
# Send a message to the Telegram bot
message_1 = f"Good morning, Please stand by for your daily forcast."
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message_1}"
requests.get(url).json()

forcasts = active_forcaster_aggregator()
forcasts.to_csv('forcasted_df.csv', index=False)


message_2 = f'Unitl nexty time. Happy hunting. S Rioghal mo dhream'
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message_2}"
requests.get(url).json()

forcasts.head()

Unnamed: 0,index,Ticker,High,Low,Close,Prev High Price,Prev Low Price,Prev Close Price,High Price Difference,High Price Difference Percentage,Low Price Difference,Low Price Difference Percentage,Close Price Difference,Close Price Difference Percentage,Low to High Difference,Low to High Difference Percentage,Forecasted High vs Prev Close Difference,Forecasted High vs Prev Close Difference Percentage
0,0,SPY,552.3322,548.399564,547.495525,547.46,537.45,538.41,4.87,0.89,10.95,2.04,9.09,1.69,14.88,2.77,13.92,2.59
1,1,FNGU,448.875675,431.405354,437.033779,406.36,357.82,378.3,42.52,10.46,73.59,20.57,58.73,15.52,91.06,25.45,70.58,18.66
2,2,AAPL,223.8096,223.769347,221.151524,220.85,214.62,217.49,2.96,1.34,9.15,4.26,3.66,1.68,9.19,4.28,6.32,2.91
3,3,TSLA,244.818731,241.630565,239.6858,226.0,216.23,220.25,18.82,8.33,25.4,11.75,19.44,8.83,28.59,13.22,24.57,11.16


# Forward Testing



In [None]:
import yfinance as yf
import pandas as pd

def forward_testing():
  # Later, to load the DataFrame from the CSV file
  forcasted_df_loaded = pd.read_csv('forcasted_df.csv')

  # create new columns to store the actual high, low, and close prices and dates
  forcasted_df_loaded['Actual High Price'] = None
  forcasted_df_loaded['Actual Low Price'] = None
  forcasted_df_loaded['Actual Close Price'] = None
  forcasted_df_loaded['Date'] = None
  forcasted_df_loaded['High Price Difference'] = None
  forcasted_df_loaded['High Price Difference Percentage'] = None
  forcasted_df_loaded['Low Price Difference'] = None
  forcasted_df_loaded['Low Price Difference Percentage'] = None
  forcasted_df_loaded['Close Price Difference'] = None
  forcasted_df_loaded['Close Price Difference Percentage'] = None

  # iterate over each row in the DataFrame
  for index, row in forcasted_df_loaded.iterrows():
      ticker = row['Ticker']
      forecasted_high_price = round(float(row['High']), 2)
      forecasted_low_price = round(float(row['Low']), 2)
      forecasted_close_price = round(float(row['Close']), 2)

      # fetch the historical data for the ticker for the last available day
      stock_data = yf.download(ticker, period='1d')

      # extract the actual high, low, and close prices and date from the historical data
      actual_high_price = round(stock_data['High'].iloc[-1], 2)
      actual_low_price = round(stock_data['Low'].iloc[-1], 2)
      actual_close_price = round(stock_data['Close'].iloc[-1], 2)
      high_price_date = stock_data.index[-1].date()

      # update the 'Actual High Price', 'Actual Low Price', 'Actual Close Price', and 'Date' columns
      forcasted_df_loaded.at[index, 'Actual High Price'] = actual_high_price
      forcasted_df_loaded.at[index, 'Actual Low Price'] = actual_low_price
      forcasted_df_loaded.at[index, 'Actual Close Price'] = actual_close_price
      forcasted_df_loaded.at[index, 'Date'] = high_price_date

      # calculate the price differences and percentage differences
      high_price_difference = round(actual_high_price - forecasted_high_price, 2)
      high_price_difference_percentage = round((high_price_difference / actual_high_price) * 100, 2)
      low_price_difference = round(actual_low_price - forecasted_low_price, 2)
      low_price_difference_percentage = round((low_price_difference / actual_low_price) * 100, 2)
      close_price_difference = round(actual_close_price - forecasted_close_price, 2)
      close_price_difference_percentage = round((close_price_difference / actual_close_price) * 100, 2)

      # update the 'High Price Difference', 'High Price Difference Percentage', 'Low Price Difference', 'Low Price Difference Percentage', 'Close Price Difference', and 'Close Price Difference Percentage' columns
      forcasted_df_loaded.at[index, 'High Price Difference'] = high_price_difference
      forcasted_df_loaded.at[index, 'High Price Difference Percentage'] = high_price_difference_percentage
      forcasted_df_loaded.at[index, 'Low Price Difference'] = low_price_difference
      forcasted_df_loaded.at[index, 'Low Price Difference Percentage'] = low_price_difference_percentage
      forcasted_df_loaded.at[index, 'Close Price Difference'] = close_price_difference
      forcasted_df_loaded.at[index, 'Close Price Difference Percentage'] = close_price_difference_percentage

  # set the 'Date' column as the index
  forcasted_df_loaded.set_index('Date', inplace=True)

  # reset the index to a single index
  forcasted_df_loaded = forcasted_df_loaded.reset_index()

  #forcasted_df_loaded.to_csv('forward_test_df.csv', index=False)