In [None]:
import pandas as pd
from textblob import TextBlob
from sklearn.preprocessing import MinMaxScaler

def preprocess_news_data(file_path_news, file_path_historical):
    df = pd.read_csv(file_path_news, parse_dates=['PublishedAt'])

    # Apply sentiment analysis
    df['Sentiment'] = df['Description'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

    # Group by date and aggregate sentiment scores
    df['Date'] = df['PublishedAt'].dt.date
    daily_sentiment = df.groupby('Date')['Sentiment'].mean().reset_index()

    # Load historical stock data for the same period
    stock_df = pd.read_csv(file_path_historical, parse_dates=['Date'])

    # Merge sentiment data with stock data
    merged_df = pd.merge(stock_df, daily_sentiment, on='Date', how='inner')

    # Select features and target
    features = merged_df[['Open', 'High', 'Low', 'Close', 'Volume', 'Sentiment']]
    target = merged_df[['High', 'Low']]

    # Scale features
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_features = scaler.fit_transform(features)

    # Create sequences
    sequence_length = 60
    X, y = [], []
    for i in range(sequence_length, len(scaled_features)):
        X.append(scaled_features[i-sequence_length:i])
        y.append(target.values[i])

    X, y = np.array(X), np.array(y)
    return X, y, scaler

def main():
    file_path_news = 'news_data_cleaned.csv'
    file_path_historical = 'historical_data_cleaned.csv'
    X, y, scaler = preprocess_news_data(file_path_news, file_path_historical)
    
    # Save preprocessed data
    np.save('X_news.npy', X)
    np.save('y_news.npy', y)

if __name__ == "__main__":
    main()


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=2))  # Predicting the 'High' and 'Low' prices
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def main():
    # Load preprocessed data
    X = np.load('X_news.npy')
    y = np.load('y_news.npy')

    # Split data into training and testing sets
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))

    checkpoint = ModelCheckpoint('news_model.h5', save_best_only=True, monitor='val_loss', mode='min')
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[checkpoint])

    # Evaluate the model
    loss = model.evaluate(X_test, y_test)
    print(f'Test Loss: {loss}')

if __name__ == "__main__":
    main()
