# A workflow that:
- 1.) Scrapes news headlines
- 2.) Performs sentiment analysis on the headlines with up to 4 different models
- 3.) Ensembles the sentimennt analyses
- 4.) Combines sentiment analysis with numeric stock data
- 5.) Gives the comined data to 3 different Ml models to forcast stock price(s)
- 6.) Ensembles the forcasts
- 7.) Pushes the forcast(s) via telegram notification with an option to continuously run the program

# Install

In [None]:

!pip install datasets
!pip install transformers
!pip install huggingface_hub
!pip install git+https://github.com/huggingface/peft.git
!pip install git+https://github.com/huggingface/accelerate.git
!pip install torch
!pip install googlesearch-python
!pip install GoogleNews
!pip install textblob
!pip install schedule
!pip install time
!pip install telebot
!pip install telethon
!pip install transformers
!pip install vadersentiment
!pip install alpha_vantage

In [None]:
import requests
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import schedule
import time
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from GoogleNews import GoogleNews
import yfinance as yf
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from prophet import Prophet
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout

## Telegram Tokens 

In [None]:

# Include Telegram bot token and chat ID for pushing forecast results
BOT_TOKEN = "<BOT_TOKEN>"
chat_id = "<CHAT_ID>" 

In [None]:
############ GOOGLE NEWS LIMITS TO 10 ARTICLES ###################
# Immediately lends itself to event-based classification, or use another source
def scrape_news_headlines(ticker):
    gn = GoogleNews(lang='en')
    gn.search(ticker)
    news_articles = gn.results()
    titles = [article['title'] for article in news_articles]
    return titles

#ticker = 'CURLF'
#articles = scrape_news_headlines(ticker)
#print(articles)

# LLM

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
#have to have access to LLama -  via hugging face
### THIS IS ABlE TO RUN ON a T4!!!
# https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Forecaster
# https://github.com/AI4Finance-Foundation/FinGPT/blob/master/FinGPT_Inference_Llama2_13B_falcon_7B_for_Beginners.ipynb
### see how the flow works for these, look at the hugging face page dataset for training.



#### note, this is for the forcaster FinGPT model, which is not specifically for sentiment analysis
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    'meta-llama/Llama-2-7b-chat-hf',
    trust_remote_code=True,
    device_map="auto",
    torch_dtype=torch.float16,   # optional if you have enough VRAM
)
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')

gpt_model = PeftModel.from_pretrained(base_model, 'FinGPT/fingpt-forecaster_dow30_llama2-7b_lora')
gpt_model = gpt_model.eval()

In [None]:
############# look at FinGPT_sentiment ##############

def gpt_sentiment_analysis(articles):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    gpt_model.to(device)  # Move the model to the device

    sentiments = []
    for article in articles:
        inputs = tokenizer(article, return_tensors='pt', max_length=512, truncation=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}  # Move the input tensors to the device
        outputs = gpt_model(inputs['input_ids'], attention_mask=inputs['attention_mask'])

        # Get sentiment scores and normalize between 0 and 1
        sentiment_scores = torch.sigmoid(outputs.logits)[0]  # Extract scores from tensor
        avg_sentiment = torch.mean(sentiment_scores)  # Calculate average sentiment score
        normalized_sentiment = (avg_sentiment - torch.min(sentiment_scores)) / (torch.max(sentiment_scores) - torch.min(sentiment_scores))  # Normalize between 0 and 1
        sentiments.append(normalized_sentiment.cpu().item())  # Convert to scalar value and append to list

    # Create a Pandas DataFrame with the sentiment scores
    headlines_df = pd.DataFrame({'title': articles, 'sentiment': sentiments})

    return headlines_df

#ticker = 'AAPL'
#articles = scrape_news_headlines(ticker)
#sentiments = gpt_sentiment_analysis(articles)
#sentiments.head()

# Get News -- either google news or News API

# Sentiment Models

## Roberta

In [None]:
def roberta_sentiment_analysis(headlines):
    # Convert the list of headlines to a pandas DataFrame
    headlines_df = pd.DataFrame({'title': headlines})

    # Load pre-trained RoBERTa model and tokenizer
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)

    # Preprocess headlines data
    encoded_headlines = headlines_df['title'].apply(lambda x: tokenizer.encode_plus(x,
                                                                                  add_special_tokens=True,
                                                                                  max_length=512,
                                                                                  return_attention_mask=True,
                                                                                  return_tensors='pt',
                                                                                  truncation=True))

    # Extract the input_ids and attention_mask from the encoded headlines
    input_ids = encoded_headlines.apply(lambda x: x['input_ids'].flatten().tolist())
    attention_mask = encoded_headlines.apply(lambda x: x['attention_mask'].flatten().tolist())

    # Create new columns in the headlines DataFrame for input_ids and attention_mask
    headlines_df['input_ids'] = input_ids
    headlines_df['attention_mask'] = attention_mask

    # Create a list to store sentiment scores
    sentiment_scores = []

    # Perform sentiment analysis on each headline
    for input_id, attention_mask in zip(headlines_df['input_ids'], headlines_df['attention_mask']):
        # Create a tensor dataset and data loader
        input_id_tensor = torch.tensor([input_id])
        attention_mask_tensor = torch.tensor([attention_mask])
        dataset = torch.utils.data.TensorDataset(input_id_tensor, attention_mask_tensor)
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=1)

        # Perform inference
        with torch.no_grad():
            for batch in data_loader:
                input_id, attention_mask = batch
                outputs = model(input_id, attention_mask=attention_mask)
                logits = outputs.logits
                sentiment_score = torch.softmax(logits, dim=1)[0][1].item()
                sentiment_scores.append(sentiment_score)

    # Normalize sentiment scores to be between 0 and 1
    min_score = min(sentiment_scores)
    max_score = max(sentiment_scores)
    normalized_sentiment_scores = [(score - min_score) / (max_score - min_score) for score in sentiment_scores]

    # Add normalized sentiment scores to the dataframe
    headlines_df['sentiment'] = normalized_sentiment_scores
    headlines_df.drop('attention_mask', axis = 1, inplace = True)

    return headlines_df

# Example usage
#ticker = 'AAPL'
#headlines = scrape_news_headlines(ticker)
#sentiments = roberta_sentiment_analysis(headlines)
#sentiments.head()

## Vader Sentiment

In [None]:
#VaderSentiment
def vader_sentiment_analysis(headlines):
    # Convert the list of headlines to a pandas DataFrame
    headlines_df = pd.DataFrame({'title': headlines})

    # Create a SentimentIntensityAnalyzer object
    sia = SentimentIntensityAnalyzer()

    # Apply sentiment analysis to each headline
    headlines_df['sentiment'] = headlines_df['title'].apply(lambda x: sia.polarity_scores(x)['compound'])

    # Normalize sentiment scores to be between 0 and 1
    headlines_df['sentiment'] = headlines_df['sentiment'].apply(lambda x: (x + 1) / 2)

    return headlines_df

#ticker = 'AAPL'
#headlines = scrape_news_headlines(ticker)
#sentiments = vader_sentiment_analysis(headlines)
#sentiments.head()

## Blob

In [None]:
from textblob import TextBlob
import pandas as pd

def blob_sentiment_analysis(headlines):
    # Convert the list of headlines to a pandas DataFrame
    headlines_df = pd.DataFrame({'title': headlines})

    # Create a new column to store the sentiment scores
    headlines_df['sentiment'] = headlines_df['title'].apply(lambda x: TextBlob(x).sentiment.polarity)

    # Normalize the sentiment scores to a range of 0 to 1
    min_sentiment = headlines_df['sentiment'].min()
    max_sentiment = headlines_df['sentiment'].max()
    headlines_df['sentiment_normalized'] = (headlines_df['sentiment'] - min_sentiment) / (max_sentiment - min_sentiment)

    return headlines_df

#ticker = 'AAPL'
#headlines = scrape_news_headlines(ticker)
#sentiments = blob_sentiment_analysis(headlines)
#sentiments.head()

# Ensemble Sentiment

In [None]:
def ensemble_sentiment_analysis(headlines, include_gpt):
    # Initialize a list to store the sentiment scores from each model
    sentiment_scores = []

    # VaderSentiment
    vader_scores = vader_sentiment_analysis(headlines)['sentiment']
    sentiment_scores.append(vader_scores)

    # RoBERTa
    roberta_scores = roberta_sentiment_analysis(headlines)['sentiment']
    sentiment_scores.append(roberta_scores)

    # TextBlob
    textblob_scores = blob_sentiment_analysis(headlines)['sentiment']
    sentiment_scores.append(textblob_scores)

    # GPT
    if include_gpt:
        gpt_scores = gpt_sentiment_analysis(headlines)['sentiment']
        sentiment_scores.append(gpt_scores)

    # Create a DataFrame to store the ensemble sentiment scores
    ensemble_df = pd.DataFrame({'headline': headlines})

    # Calculate the average sentiment score across all models
    ensemble_df['ensemble_sentiment'] = np.mean(sentiment_scores, axis=0)

    # Calculate the standard deviation of sentiment scores across all models
    ensemble_df['ensemble_sentiment_std'] = np.std(sentiment_scores, axis=0)

    return ensemble_df

#ticker = 'AAPL'
#headlines = scrape_news_headlines(ticker)
#sentiments = ensemble_sentiment_analysis(headlines, include_gpt=False)
#sentiments.tail()

# Sentiment Pipieline

In [None]:
def scrape_sentiment_ensemble_pipeline(ticker, include_gpt):
  headlines = scrape_news_headlines(ticker)
  sentiments = ensemble_sentiment_analysis(headlines, include_gpt=include_gpt)
  return sentiments

scrape_sentiment_ensemble_pipeline(ticker = 'TSLA', include_gpt=False)

# Numeric models

## Get Data

In [None]:
def get_numeric_data(ticker):
    # Define the ticker symbol and time period
    period = "3mo" #must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']"
    interval = "1h"

    # Retrieve the historical market data
    data = yf.download(tickers=ticker, period=period, interval=interval)

    # Retrieve the ticker info
    ticker_info = yf.Ticker(ticker).info

    # Extract the market cap
    market_cap = ticker_info['marketCap']

    # Calculate the moving averages
    data['MA_50'] = data['Close'].rolling(window=50).mean()
    data['MA_200'] = data['Close'].rolling(window=200).mean()

    # Calculate the RSI
    delta = data['Close'].diff(1)
    up, down = delta.copy(), delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    roll_up = up.rolling(window=14).mean()
    roll_down = down.rolling(window=14).mean().abs()
    RS = roll_up / roll_down
    data['RSI'] = 100.0 - (100.0 / (1.0 + RS))

    # Create a single dataframe called numeric_df
    numeric_df = data.select_dtypes(include=['number'])

    # Reset the index
    numeric_df.reset_index(inplace=True)

    return numeric_df

#ticker = 'AAPL'
#numeric = get_numeric_data(ticker)
#numeric.head(10)

## Concat with sentiment

In [None]:
def concat_numeric_sentiments(numeric_df, sentiments_df):
  # Concatenate the two dataframes along the row axis
  combined_df = pd.concat([numeric_df, sentiments_df], axis=1)

  # Rename the columns to avoid any conflicts
  combined_df = combined_df.rename(columns={'Open': 'numeric_Open',
                                          'High': 'numeric_High',
                                          'Low': 'numeric_Low',
                                          'Close': 'numeric_Close',
                                          'Adj Close': 'numeric_Adj Close',
                                          'Volume': 'numeric_Volume',
                                          'MA_50': 'numeric_MA_50',
                                          'MA_200': 'numeric_MA_200',
                                          'RSI': 'numeric_RSI'})

  # Add a timestamp column to the sentiments_df
  sentiments_df['timestamp'] = pd.Timestamp.now()

  # Concatenate the two dataframes along the row axis
  combined_df = pd.concat([numeric_df, sentiments_df], axis=1)

  # Drop any duplicate columns
  combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

  # Reset the index of the combined dataframe
  combined_df = combined_df.reset_index(drop=True)

  return combined_df

ticker = 'AAPL'
numeric_df = get_numeric_data(ticker)
sentiments_df = scrape_sentiment_ensemble_pipeline(ticker = ticker, include_gpt=False)
combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
combined_df.tail()

## prophet

In [None]:
def prophet_forecast(df, future_days):
    # Select only the necessary columns
    df = df[['Datetime', 'Close']]

    # Rename the columns to match Prophet's requirements
    df = df.rename(columns={'Datetime': 'ds', 'Close': 'y'})

    # Ensure the 'ds' column is of datetime type and remove timezone
    df['ds'] = pd.to_datetime(df['ds']).dt.tz_localize(None)

    # Create a Prophet model
    model = Prophet()

    # Fit the model to the data
    model.fit(df)

    # Make a forecast for the next 'future_days' days
    future = model.make_future_dataframe(periods=future_days)

    # Make predictions on the future dataframe
    forecast = model.predict(future)

    # Extract the forecasted close prices
    future_close_prices = forecast['yhat'].values[-future_days:]
    #model.plot(forecast)
    return future_close_prices

ticker = 'CURlF'
combined_df = get_numeric_data(ticker)
future_close_prices = prophet_forecast(combined_df, future_days = 30)
print("Future close prices:", future_close_prices)

## LSTM

### LSTM Trian

In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt

def train_lstm_forcast(df):

  # Load the data
  df = df.copy()
  df.drop('headline', axis = 1, inplace = True)

  # Convert the Datetime column to datetime format
  df['Datetime'] = pd.to_datetime(df['Datetime'])

  # Move the Datetime column to the first position
  cols = df.columns.tolist()
  cols.insert(0, cols.pop(cols.index('Datetime')))
  df = df[cols]

  # Create a new dataframe with only the relevant columns
  relevant_cols = ['Open', 'High', 'Low', 'Close', 'Volume',
                    'MA_50', 'MA_200', 'RSI',
                  'ensemble_sentiment', 'ensemble_sentiment_std']
  data_df = df[relevant_cols]

  # Drop any rows with missing values
  #df_resampled.dropna(inplace=True)

  # Create a new dataframe with only the Close column
  close_df = data_df[['Close']]
  close_df.set_index(df['Datetime'], inplace=True)

  # Scale the data using Min-Max Scaler
  scaler = MinMaxScaler(feature_range=(0,1))
  close_scaled = scaler.fit_transform(close_df)

  # Create a function to create the LSTM dataset
  def create_lstm_dataset(X, y, time_steps=1):
      Xs, ys = [], []
      for i in range(len(X)-time_steps):
          Xs.append(X[i:(i+time_steps)])
          ys.append(y[i+time_steps])
      return np.array(Xs), np.array(ys)

  # Create the LSTM dataset
  X, y = create_lstm_dataset(close_scaled, close_scaled, time_steps=60)

  # Reshape the data for LSTM
  X = X.reshape(X.shape[0], X.shape[1], 1)

  # Create the LSTM model
  model = Sequential()
  model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
  model.add(Dropout(0.2))
  model.add(LSTM(units=50, return_sequences=False))
  model.add(Dropout(0.2))
  model.add(Dense(units=1))

  # Compile the model
  model.compile(optimizer='adam', loss='mean_squared_error')

  # Train the model
  model.fit(X, y, epochs=100, batch_size=32, verbose=0)
  model.save_weights('lstm_model.h5')

  # Make predictions on the last 60 hours of data
  last_60_hours = close_scaled[-60:]
  last_60_hours = last_60_hours.reshape(1, 60, 1)
  prediction = model.predict(last_60_hours)

  # Inverse transform the prediction
  prediction = scaler.inverse_transform(prediction)

  # Print the predicted close price
  print("Predicted Close Price:", prediction[0][0])

  # Plot the predicted close price
  plt.plot(close_df.index[-60:], close_df.values[-60:])
  plt.plot([close_df.index[-1] + pd.Timedelta(hours=1)], [prediction[0][0]], 'ro', label='Predicted Close Price')
  plt.legend(loc='upper left')
  plt.show()

  return prediction

#combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
#train_lstm_forcast(combined_df)

### LSTM Forcast

In [None]:
def lstm_forcast(df, future_days):
  # Load the data
  df = df.copy()
  df.drop('headline', axis = 1, inplace = True)

  # Convert the Datetime column to datetime format
  df['Datetime'] = pd.to_datetime(df['Datetime'])

  # Move the Datetime column to the first position
  cols = df.columns.tolist()
  cols.insert(0, cols.pop(cols.index('Datetime')))
  df = df[cols]

  # Create a new dataframe with only the relevant columns
  relevant_cols = ['Open', 'High', 'Low', 'Close', 'Volume',
                    'MA_50', 'MA_200', 'RSI',
                  'ensemble_sentiment', 'ensemble_sentiment_std']
  data_df = df[relevant_cols]

  # Drop any rows with missing values
  #df_resampled.dropna(inplace=True)

  # Create a new dataframe with only the Close column
  close_df = data_df[['Close']]
  close_df.set_index(df['Datetime'], inplace=True)

  # Scale the data using Min-Max Scaler
  scaler = MinMaxScaler(feature_range=(0,1))
  close_scaled = scaler.fit_transform(close_df)

  # Create a function to create the LSTM dataset
  def create_lstm_dataset(X, y, time_steps=1):
      Xs, ys = [], []
      for i in range(len(X)-time_steps):
          Xs.append(X[i:(i+time_steps)])
          ys.append(y[i+time_steps])
      return np.array(Xs), np.array(ys)

  # Create the LSTM dataset
  X, y = create_lstm_dataset(close_scaled, close_scaled, time_steps=60)

  # Reshape the data for LSTM
  X = X.reshape(X.shape[0], X.shape[1], 1)

  # Create the LSTM model
  model = Sequential()
  model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
  model.add(Dropout(0.2))
  model.add(LSTM(units=50, return_sequences=False))
  model.add(Dropout(0.2))
  model.add(Dense(units=1))

  # Compile the model
  model.compile(optimizer='adam', loss='mean_squared_error')

  # Load the trained model
  model.load_weights('lstm_model.h5')

  # Prepare the input data for forecasting
  last_60_hours = close_scaled[-60:]
  future_input = last_60_hours.reshape(1, 60, 1)

  # Forecast future close prices
  future_close_prices = []
  for i in range(future_days):
    prediction = model.predict(future_input)
    prediction = scaler.inverse_transform(prediction)
    future_close_prices.append(prediction[0, 0])
    future_input = np.append(future_input[:, 1:, :], prediction.reshape((1, 1, 1)), axis=1)

  return future_close_prices

#combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
#future_close_prices = lstm_forcast(combined_df, future_days = 30)
#print("Future close prices:", future_close_prices)

## GRU

### GRU Train

In [None]:
def train_gru_forcast(df):
    # Load the data
  df = df.copy()
  df.drop('headline', axis = 1, inplace = True)

  # Convert the Datetime column to datetime format
  df['Datetime'] = pd.to_datetime(df['Datetime'])

  # Move the Datetime column to the first position
  cols = df.columns.tolist()
  cols.insert(0, cols.pop(cols.index('Datetime')))
  df = df[cols]

  # Create a new dataframe with only the 'Close' column
  close_df = df[['Close']]

  # Scale the data
  scaler = MinMaxScaler(feature_range=(0,1))
  scaled_close = scaler.fit_transform(close_df)

  # Split the data into training and testing sets
  train_size = int(len(scaled_close) * 0.8)
  test_size = len(scaled_close) - train_size
  train_close, test_close = scaled_close[0:train_size], scaled_close[train_size:len(scaled_close)]

  # Convert the data into sequences
  def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])  # Append the entire row
        y.append(data[i+seq_len])  # Append the entire next row
    return np.array(X), np.array(y)

  seq_len = 60
  X_train, y_train = create_sequences(train_close, seq_len)
  print("X_train shape:", X_train.shape)
  print("y_train shape:", y_train.shape)
  X_test, y_test = create_sequences(test_close, seq_len)
  print("X_test shape:", X_test.shape)
  print("y_test shape:", y_test.shape)

  # Reshape the data for GRU
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

  # Create the GRU model
  model = Sequential()
  model.add(GRU(units=50, return_sequences=True, input_shape=(seq_len, 1)))
  model.add(Dropout(0.2))
  model.add(GRU(units=50, return_sequences=False))
  model.add(Dropout(0.2))
  model.add(Dense(units=1))

  # Compile the model
  model.compile(optimizer='adam', loss='mean_squared_error')


  # Train the model
  model.fit(X_train, y_train, epochs=200, batch_size=32, validation_data=(X_test, y_test), verbose=0)
  model.save_weights('gru_model.h5')

  # Make predictions
  predictions = model.predict(X_test)

  # Rescale the predictions
  predictions_rescaled = scaler.inverse_transform(predictions)

  # Rescale the actual values
  y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))

  print("Predictions (rescaled):", predictions_rescaled)
  print("Actual (rescaled):", y_test_rescaled)

  # Plot the results
  plt.plot(y_test_rescaled, label='Actual')
  plt.plot(predictions_rescaled, label='Predicted')
  plt.legend()
  plt.show()

#combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
#train_gru_forcast(combined_df)

### GRU Forcast

In [None]:
def gru_forcast(df, future_days):
  # Load the data
  df = df.copy()
  df.drop('headline', axis = 1, inplace = True)

  # Convert the Datetime column to datetime format
  df['Datetime'] = pd.to_datetime(df['Datetime'])

  # Move the Datetime column to the first position
  cols = df.columns.tolist()
  cols.insert(0, cols.pop(cols.index('Datetime')))
  df = df[cols]

  # Create a new dataframe with only the 'Close' column
  close_df = df[['Close']]

  # Scale the data
  scaler = MinMaxScaler(feature_range=(0,1))
  scaled_close = scaler.fit_transform(close_df)

  # Prepare the input data for the model
  seq_len = 60
  last_seq = scaled_close[-seq_len:]
  future_input = last_seq.reshape((1, seq_len, 1))

  # Create the GRU model
  model = Sequential()
  model.add(GRU(units=50, return_sequences=True, input_shape=(seq_len, 1)))
  model.add(Dropout(0.2))
  model.add(GRU(units=50, return_sequences=False))
  model.add(Dropout(0.2))
  model.add(Dense(units=1))

  # Compile the model
  model.compile(optimizer='adam', loss='mean_squared_error')

  # Load the trained model
  model.load_weights('gru_model.h5') ###############

  # Make predictions
  predictions = model.predict(future_input)

  # Rescale the predictions
  predictions_rescaled = scaler.inverse_transform(predictions)

  # Forecast future close prices
  future_close_prices = []
  for i in range(future_days):
    future_input = np.append(future_input[:, 1:, :], predictions.reshape((1, 1, 1)), axis=1)
    predictions = model.predict(future_input)
    predictions_rescaled = scaler.inverse_transform(predictions)
    future_close_prices.append(predictions_rescaled[0, 0])

  # plot future close prices
  #plt.plot( future_close_prices, label='Predicted')
  #plt.legend()
  #plt.show()

  return future_close_prices
ticker = 'PG'
include_gpt=False
sentiments_df = scrape_sentiment_ensemble_pipeline(ticker, include_gpt=include_gpt)
numeric_df = get_numeric_data(ticker)
combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
future_close_prices = gru_forcast(combined_df, future_days = 1)
print("Future close prices:", future_close_prices)

## Ensemble Forcast

In [None]:
def focast_ensemble(df, future_days):
  prophet_preds = prophet_forecast(df, future_days)
  print(prophet_preds)
  lstm_preds = lstm_forcast(df, future_days=future_days)
  print(lstm_preds)
  gru_preds = gru_forcast(df, future_days=future_days)
  print(gru_preds)
  ensemble_forecast = (prophet_preds + lstm_preds + gru_preds) / 3
  plt.plot(ensemble_forecast, label='Ensemble')
  plt.legend()
  plt.show()
  return ensemble_forecast

ticker = 'PG'
include_gpt=False
sentiments_df = scrape_sentiment_ensemble_pipeline(ticker, include_gpt=include_gpt)
numeric_df = get_numeric_data(ticker)
combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
future_close_prices = focast_ensemble(combined_df, future_days = 30)
print("Future close prices:", future_close_prices)

# Forecast Pipeline

In [None]:
def forecast_pipeline(ticker, include_gpt, future_days):
  sentiments_df = scrape_sentiment_ensemble_pipeline(ticker, include_gpt)
  #print(sentiments_df.head())
  numeric_df = get_numeric_data(ticker)
  #print(numeric_df.head())
  combined_df = concat_numeric_sentiments(numeric_df, sentiments_df)
  #print(combined_df.head())
  future_close_prices = focast_ensemble(combined_df, future_days)
  return future_close_prices

#ticker = 'TSLA'
#include_gpt=False
#future_days=30
#forecast_pipeline(ticker, include_gpt, future_days)
################ SIIIIICK #####################

# Telegram Push

In [None]:
def send_forcast(ticker, prediction):

    message = f"Predicted stock price for: \n {ticker}: {round(prediction[0], 2)}"

    url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage?chat_id={chat_id}&text={message}"

    requests.get(url).json()

# WHOLE PIPELINE

In [None]:
def active_forcaster_aggregator():
  ticker = 'HD'
  include_gpt=False
  future_days=1
  prediction = forecast_pipeline(ticker, include_gpt=include_gpt, future_days=future_days)
  send_forcast(ticker, prediction)
  return prediction

active_forcaster_aggregator()


# Repeater

In [None]:
# Run the pipeline every specified interval
schedule.clear()
schedule.every(1).minutes.do(active_forcaster_aggregator)  # Run every 1 minute

while True:
    schedule.run_pending()
    time.sleep(1)