## Adding sentiment data input
# collecting news sentiment data

In [1]:
import numpy as np
import pandas as pd
import requests
import yfinance as yf
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tensorflow.keras.layers import Input, LSTM, Dense, Concatenate
from tensorflow.keras.models import Model
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
# Set random seed for reproducibility
tf.random.set_seed(1234)

# Instantiate sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Fetch news articles and compute sentiment score
def fetch_news(ticker):
    url = f"https://newsapi.org/v2/everything?q={ticker}&apiKey=5963eddbacab4d87a3edd2f2c111b249"
    response = requests.get(url)
    articles = response.json()['articles']
    headlines = [article['title'] for article in articles]
    return headlines

def analyze_sentiment(headlines):
    sentiment_scores = []
    for headline in headlines:
        score = analyzer.polarity_scores(headline)['compound']
        sentiment_scores.append(score)
    avg_sentiment = sum(sentiment_scores) / len(sentiment_scores)
    return avg_sentiment

# Fetch real-time stock price from Yahoo Finance API
def fetch_real_time_stock_data(ticker):
    data = yf.download(ticker, period='1d', interval='1m')
    return data['Close'].values[-1]

# preprocessing stock and sentiment data together 

In [3]:
df = pd.read_csv("sp_und.csv")
df.head()

Unnamed: 0,secid,date,ticker,low,high,open,close,volume
0,108105,20010102,SPX,1276.05,1320.28,1320.28,1283.27,0
1,108105,20010103,SPX,1274.62,1347.76,1283.27,1347.56,0
2,108105,20010104,SPX,1329.14,1350.24,1347.56,1333.34,0
3,108105,20010105,SPX,1294.95,1334.77,1333.34,1298.35,0
4,108105,20010108,SPX,1276.29,1298.35,1298.35,1295.86,0


In [4]:
df.size

38232

In [5]:
df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')

In [6]:
# Append sentiment scores to the DataFrame
df['sentiment'] = df['date'].apply(lambda x: analyze_sentiment(fetch_news('SPX')))

ConnectionError: HTTPSConnectionPool(host='newsapi.org', port=443): Max retries exceeded with url: /v2/everything?q=SPX&apiKey=5963eddbacab4d87a3edd2f2c111b249 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000226DD8E0220>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

In [None]:
# Preprocess stock price data (closing prices)
X_stock = df[['close']].values
X_stock = (X_stock - np.mean(X_stock)) / np.std(X_stock)

# Preprocess sentiment data
X_sentiment = df[['sentiment']].values
X_sentiment = (X_sentiment - np.mean(X_sentiment)) / np.std(X_sentiment)

# Combine stock prices and sentiment data
X_combined = np.concatenate([X_stock, X_sentiment], axis=1)