<a href="https://colab.research.google.com/github/ayush6233/Sentiment-aware-LSTM-for-confident-time-series-forecasting./blob/main/stockprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
import yfinance as yf
import os, requests
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import mean_absolute_error
from datetime import datetime
ticker = "AAPL"
start_date = "2024-04-01"
end_date = "2025-04-01"
window_size = 60

## Fetch prices
data = yf.download(ticker, start=start_date, end=end_date, progress=False)
prices = data['Close'].copy()
dates = prices.index
N = len(prices)
## we will ofc assume N>=60

## fetch news
FINNHUB_KEY = os.getenv("FINNHUB_API_KEY", "d2tei5hr01qr5a729ho0d2tei5hr01qr5a729hog").strip()
analyzer = SentimentIntensityAnalyzer()

def fetch_finnhub_news(symbol, from_date, to_date, api_key):
    url = "https://finnhub.io/api/v1/company-news"
    params = {"symbol": symbol, "from": from_date, "to": to_date, "token": api_key}
    r = requests.get(url, params=params, timeout=20)
    r.raise_for_status()
    items = r.json()
    rows = []
    for it in items:
        ts = it.get("datetime")
        dt = datetime.utcfromtimestamp(int(ts)).date()
        text = (it.get("headline","") + ". " + (it.get("summary") or ""))
        rows.append({"date": pd.to_datetime(dt), "text": text, "url": it.get("url")})
    return pd.DataFrame(rows)




df_news = fetch_finnhub_news(ticker, start_date, end_date, FINNHUB_KEY)

# If empty, create zero-sentiment series for all trading days

# compute VADER scores and average per day
df_news['compound'] = df_news['text'].apply(lambda t: analyzer.polarity_scores(str(t))['compound'])
daily_sent = df_news.groupby('date')['compound'].mean()

daily_sent_full = daily_sent.reindex(dates, fill_value=0.0)

price_values = prices.values.reshape(-1, 1)
sentiment_values = daily_sent_full.values.reshape(-1, 1)


S = N - window_size
train_samples = int(S * 0.8)
test_samples = S - train_samples

train_raw_end_idx = window_size + train_samples - 1
train_raw_end_idx_plus1 = train_raw_end_idx + 1


price_scaler = MinMaxScaler()
sentiment_scaler = MinMaxScaler()
price_scaler.fit(price_values[:train_raw_end_idx_plus1])
sentiment_scaler.fit(sentiment_values[:train_raw_end_idx_plus1])

scaled_prices = price_scaler.transform(price_values)
scaled_sentiments = sentiment_scaler.transform(sentiment_values)

# Build equences
X, y = [], []
for i in range(window_size, N):
    price_win = scaled_prices[i-window_size:i, 0]
    sent_win = scaled_sentiments[i-window_size:i, 0]
    X.append(np.column_stack((price_win, sent_win)))
    y.append(scaled_prices[i, 0])
X = np.array(X)           # shape (S, window_size, 2)
y = np.array(y).reshape(-1, 1)


X_train, X_test = X[:train_samples], X[train_samples:]
y_train, y_test = y[:train_samples], y[train_samples:]

# # quick sanity checks
# assert X_train.shape[0] == y_train.shape[0]
# assert X_test.shape[0] == y_test.shape[0]

# model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(window_size, 2)),
    Dropout(0.2),
    LSTM(32),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
es = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[es], shuffle=False, verbose=1)

# Predict and inverse transform
pred_scaled = model.predict(X_test)
true_scaled = y_test

pred_prices = price_scaler.inverse_transform(pred_scaled)
true_prices = price_scaler.inverse_transform(true_scaled)

#evaluation
mae = mean_absolute_error(true_prices, pred_prices)
eps = 1e-8
mape = np.mean(np.abs((true_prices - pred_prices) / np.maximum(np.abs(true_prices), eps))) * 100
print(f"MAE = {mae:.4f}")
print(f"MAPE = {mape:.4f}%")


  data = yf.download(ticker, start=start_date, end=end_date, progress=False)


Epoch 1/50


  dt = datetime.utcfromtimestamp(int(ts)).date()
  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 177ms/step - loss: 0.1356 - mae: 0.3492 - val_loss: 0.0965 - val_mae: 0.2991
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - loss: 0.0430 - mae: 0.1829 - val_loss: 0.0313 - val_mae: 0.1570
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - loss: 0.0122 - mae: 0.0877 - val_loss: 0.0082 - val_mae: 0.0697
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - loss: 0.0183 - mae: 0.1148 - val_loss: 0.0065 - val_mae: 0.0700
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0063 - mae: 0.0632 - val_loss: 0.0294 - val_mae: 0.1513
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0111 - mae: 0.0863 - val_loss: 0.0259 - val_mae: 0.1396
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0088 - mae: 0.0740 - va