# Forecasting using only numeric stock data

No sentiment analysis or news aggregation, just good ole numbers in the data. 

Get the High, Low, and Close forcasts for the day and get them sent to your phone via Telegram.

NOTE: This notebook is set up to be run on Google Colab






## Install & Import Dependencies

In [None]:
# To load the the pre-trained model and tokenizer down the line
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%%capture --no-stderr
%pip install --quiet -U datasets transformers huggingface_hub torch googlesearch-python GoogleNews textblob schedule time telebot telethon transformers vaderSentiment alpha_vantage Prophet tensorflow optuna newsapi-python
%pip install --quiet -U git+https://github.com/huggingface/peft.git
%pip install --quiet -U git+https://github.com/huggingface/accelerate.git
%pip install --quiet -U --upgrade tensorflow
%pip install --quiet -U schedule
%pip install --quiet -U vaderSentiment
%pip install --quiet -U GoogleNews

In [109]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from textblob import TextBlob
import schedule
import time
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from GoogleNews import GoogleNews
import yfinance as yf
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from prophet import Prophet
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

## Telegram Tokens

In [None]:
# Include Telegram bot token and chat ID for pushing forecast results
BOT_TOKEN = "<BOT_TOKEN>"
chat_id = "<CHAT_ID>" 

# Data
Call some basic stock data and calculate some standard technical indicators to suppliment the data.

In [None]:
def get_numeric_data(ticker):
    ###gru###period = "3mo" #must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']"
    period = '3mo'
    interval = "1h"
    # Retrieve the historical market data
    data = yf.download(tickers=ticker, period=period, interval=interval, group_by='ticker')

    # Make the df flat -- yfinance.download() makes multiindex as of Oct2024
    data = data.stack(level=0)

    data = data.reset_index()  # Reset both levels of index to columns

    data.rename(columns={
        'level_1': 'Ticker'
    }, inplace=True)

    # Calculate the moving averages
    data['MA_50'] = data['Close'].rolling(window=50).mean()
    data['MA_200'] = data['Close'].rolling(window=200).mean()

    # Calculate the RSI
    delta = data['Close'].diff(1)
    up, down = delta.copy(), delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    roll_up = up.rolling(window=14).mean()
    roll_down = down.rolling(window=14).mean().abs()
    RS = roll_up / roll_down
    data['RSI'] = 100.0 - (100.0 / (1.0 + RS))

    # Calculate the MACD
    data['EMA_12'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['EMA_26'] = data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA_12'] - data['EMA_26']
    data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

    # Create a single dataframe called numeric_df with only numeric columns
    numeric_df = data.select_dtypes(include=['number'])

    # Add back Date and Ticker columns
    numeric_df['Date'] = data['Datetime']
    numeric_df['Ticker'] = data['Ticker']


    # Set Date as index
    #numeric_df.set_index('Date', inplace=True)
    #numeric_df.set_index('Date', inplace=True)

    return numeric_df


#ticker = 'SPY'
#numeric = get_numeric_data(ticker)
#numeric.tail()

[*********************100%***********************]  1 of 1 completed
  data = data.stack(level=0)


Price,Open,High,Low,Close,Volume,MA_50,MA_200,RSI,EMA_12,EMA_26,MACD,Signal,Date,Ticker
415,607.231995,607.820007,607.195007,607.549988,3346931,590.962872,595.403302,85.79595,603.019635,598.88962,4.130014,3.68651,2025-01-22 16:30:00+00:00,SPY
416,607.559998,607.700012,606.941223,607.066284,2835236,591.504198,595.400134,82.394304,603.642196,599.495299,4.146897,3.778587,2025-01-22 17:30:00+00:00,SPY
417,607.080017,607.626709,606.830017,606.905029,2324274,591.977499,595.394334,85.294477,604.144171,600.044168,4.100002,3.84287,2025-01-22 18:30:00+00:00,SPY
418,606.909973,607.619995,606.78009,607.119995,3100824,592.467899,595.394309,88.059233,604.60199,600.568303,4.033686,3.881034,2025-01-22 19:30:00+00:00,SPY
419,607.119995,607.349976,606.289978,606.320007,4309696,592.9874,595.387834,86.239823,604.8663,600.994356,3.871945,3.879216,2025-01-22 20:30:00+00:00,SPY


# GRU forcast
This is the combined training and forecasting of/with the GRU model

The model has been tuned to a workable extent.

A GRU model is used here because:
- 1.) They are good with time-series data
- 2.) They handel long-term dependencies well
- 3.) They are simple to create and are computationally efficient
- 4.) This partictular architecture worked better than LSTM, Prophet, and BRNNs both in forward and backward testing

I include Monte Carlo simulations to increase the accurary of the Forecasts

In [None]:
import tensorflow as tf

def gru_forcast(target, df, future_days):
    #print(target)
    num_mc_simulations=100

    # Load the data
    df = df.copy()

    # Convert the Datetime column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])

    # Move the Datetime column to the first position
    cols = df.columns.tolist()
    cols.insert(0, cols.pop(cols.index('Date')))
    df = df[cols]

    # Create a new dataframe with only the 'Close' column
    close_df = df[[target]]

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_close = scaler.fit_transform(close_df)

    # Prepare the input data for the model
    seq_len = 1
    last_seq = scaled_close[-seq_len:]
    last_seq = last_seq.reshape((1, seq_len, 1))

    ########################################################
    # Uncomment the following lines to load the pre-trained model after training below
    
    # Load the trained model
    #from google.colab import drive
    #drive.mount('/content/drive')
    #model = tf.keras.models.load_model('/content/drive/My Drive/<MODEL_NAME>.keras')
    ########################################################
    # Make predictions
    future_close_prices_mc = []
    for _ in range(num_mc_simulations):
        future_close_prices = []
        last_seq_mc = scaled_close[-seq_len:]
        last_seq_mc = last_seq_mc.reshape((1, seq_len, 1))
        for i in range(future_days):
            predictions = model.predict(last_seq_mc, verbose=0)
            predictions_rescaled = scaler.inverse_transform(predictions)
            future_close_prices.append(predictions_rescaled[0, 0])
            predictions = predictions.reshape((1, 1, 1));  # Reshape predictions to (1, 1, 1)
            last_seq_mc = np.append(last_seq_mc[:, 1:, :], predictions, axis=1)
        future_close_prices_mc.append(future_close_prices)

    # Calculate the Monte Carlo average
    future_close_prices_mc_avg = np.mean(future_close_prices_mc, axis=0)

    ########################################################
    # Comment out after the model is trained

    # Save the model
    model.save('/content/drive/My Drive/<MODEL_NAME>.keras')
    ########################################################

    return future_close_prices_mc_avg

#ticker = 'SPY'
#future_days = 1
#target = 'Close'
#df = get_numeric_data(ticker)
#future_close_prices = gru_forcast(target, df, future_days=future_days)
#print("Future close prices:", future_close_prices)

[*********************100%***********************]  1 of 1 completed
  data = data.stack(level=0)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Future close prices: [604.12006 602.2078  600.5362  599.06256 597.7588 ]


# Numeric Forecasting Pipeline
Get the data and get the High, Low, Close price Forecasts

In [None]:
def forecast_pipeline(ticker, future_days):
  print(ticker)
  forcasted_df = pd.DataFrame()

  #data
  df = get_numeric_data(ticker)


  #forcasts
  forcasted_df['High']  = gru_forcast('High', df, future_days)
  forcasted_df['Low'] = gru_forcast('Low', df, future_days)
  forcasted_df['Close'] =  gru_forcast('Close', df, future_days)

  # Round all values to 2 decimal places
  forcasted_df = forcasted_df.round(2)

  return forcasted_df

#ticker_list = ['SPY','FNGU']
#future_days=1
#predictions=[]
#for ticker in ticker_list:
    #prediction = forecast_pipeline(ticker, future_days)
    #send_forcast(ticker, prediction)
    #predictions.append(prediction)  # append each prediction to the list
    #predictions.append({'Ticker': ticker, 'Prediction': prediction})  # append each prediction to the list
    #print(prediction)

# Push Forecasts to Telegram

In [None]:
def send_forcast(ticker, df, prev_close, future_days):
    forecasted_high = round(df['High'][0], 2)
    forecasted_low = round(df['Low'][0], 2)
    forecasted_close = round(df['Close'][0], 2)
    forecasted_high_vs_prev_close_difference = round(forecasted_high - prev_close, 2)

    message = f"Predicted Prices for: \n {ticker}:\n"
    message += f"Days: {future_days}\n"
    message += f"High: {forecasted_high}\n"
    message += f"Low: {forecasted_low}\n"
    message += f"Close: {forecasted_close}\n"
    #message += f"Forecasted High vs Prev Close Difference: {forecasted_high_vs_prev_close_difference}\n"

    url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message}"

    requests.get(url).json()

#ticker = 'SPY'
#include_gpt=False
#future_days=1
#forcasts = forcast_pipeline(ticker, include_gpt, future_days)
#send_forcast(ticker, forcasts)

# Pipeline
Data, forecasting, and aggregation pipeline

In [132]:
import yfinance as yf
import pandas as pd

def active_forcaster_aggregator(future_days):
    ticker_list =  ['SPY', 'FNGU' #'EURUSD=X',
                    #'CROX',
                    #'YETI', 'ATKR', 'OTTR', 'BLDR', 'JHX', 'HD', 'LOW',
                    #'LPX', 'LEN', 'DHI', 'TOL', 'MHK', 'IP', 'GPK', 'KNX', 'ODFL', 'SAIA', 'TFII', 'XPO',
                    #'VITL', 'CALM', 'SHW', 'PPG'
                    #'AAPL', 'TSLA', 'CURLF', 'INQD','ACB','AUR','GRST',
                    #'AXP', 'AMGN', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD',
                    #'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE',
                    #'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS','DOW'
                    ]

    future_days=future_days #only works 1 day so far
    predictions = []  # create an empty list to store predictions
    for ticker in ticker_list:
        print(ticker)
        prediction = forecast_pipeline(ticker, future_days)
        stock_data = yf.download(ticker, period='1d')
        prev_close = round(stock_data['Close'].iloc[-1], 2)
        send_forcast(ticker, prediction, prev_close, future_days)
        predictions.append({'Ticker': ticker, 'High': prediction['High'].iloc[0], 'Low': prediction['Low'].iloc[0], 'Close': prediction['Close'].iloc[0]})

    forcasted_df = pd.DataFrame(predictions)
    return forcasted_df

forcasted = active_forcaster_aggregator(future_days=1)
forcasted.head()

[*********************100%***********************]  1 of 1 completed

SPY
SPY



  data = data.stack(level=0)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

FNGU
FNGU



  data = data.stack(level=0)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Ticker,High,Low,Close
0,SPY,605.130005,603.909973,604.119995
1,FNGU,654.76001,649.150024,651.98999


## Push the Initial Forecasts

In [None]:

message_1 = f"Good morning. Please stand by for your daily forcast."
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message_1}"
requests.get(url).json()

forcast_one = active_forcaster_aggregator(future_days=1)
#forcast_five = active_forcaster_aggregator(future_days=5)
#forcast_ten = active_forcaster_aggregator(future_days=10)
#forcast_thirty = active_forcaster_aggregator(future_days=30)

message_2 = f'Call if I can be of service. Happy hunting. S Rioghal mo dhream'
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message_2}"
requests.get(url).json()



# Repeater
(optional) If you want hourly updates on news and price forecasts (if the news is salient) this will continuously run.

In [None]:
def agregate_forecasts():
  forcast_one = active_forcaster_aggregator(future_days=1)
  #forcast_five = active_forcaster_aggregator(future_days=5)
  #forcast_ten = active_forcaster_aggregator(future_days=10)
  #forcast_thirty = active_forcaster_aggregator(future_days=30)

In [None]:
# Run the pipeline every specified interval
schedule.clear()
schedule.every(60).minutes.do(agregate_forecasts)  # Run every 1 minute

while True:
    schedule.run_pending()
    time.sleep(1)

[*********************100%***********************]  1 of 1 completed

SPY
SPY



  data = data.stack(level=0)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

FNGU
FNGU



  data = data.stack(level=0)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


KeyboardInterrupt: 