In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten, GRU, MultiHeadAttention, LayerNormalization
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

## Data Preparation
# Download S&P 500 data
def download_sp500_data(ticker='^GSPC', start_date='2010-01-01', end_date='2020-12-31'):
    try:
        data = yf.download(ticker, start=start_date, end=end_date)
        if data.empty:
            print(f"Failed to download data for {ticker}. Please check your internet connection or ticker symbol.")
            return None
        return data
    except Exception as e:
        print(f"Error downloading data: {str(e)}")
        return None

# Add technical indicators
def add_technical_indicators(df):
    if df is None or df.empty:
        return None
    
    try:
        # Moving Averages
        df['MA_10'] = df['Close'].rolling(window=10).mean()
        df['MA_20'] = df['Close'].rolling(window=20).mean()
        df['MA_50'] = df['Close'].rolling(window=50).mean()
        
        # RSI
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        # Avoid division by zero
        loss = loss.replace(0, np.finfo(float).eps)
        rs = gain / loss
        df['RSI'] = 100 - (100 / (1 + rs))
        
        # MACD
        exp12 = df['Close'].ewm(span=12, adjust=False).mean()
        exp26 = df['Close'].ewm(span=26, adjust=False).mean()
        df['MACD'] = exp12 - exp26
        df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
        
        # Bollinger Bands
        df['Upper_Band'] = df['MA_20'] + (df['Close'].rolling(window=20).std() * 2)
        df['Lower_Band'] = df['MA_20'] - (df['Close'].rolling(window=20).std() * 2)
        
        return df.dropna()
    except Exception as e:
        print(f"Error adding technical indicators: {str(e)}")
        return None

# Prepare data for time series prediction
def prepare_data(df, lookback=60, forecast_horizon=5):
    if df is None or df.empty:
        print("Cannot prepare data: DataFrame is empty or None")
        return None, None, None
    
    try:
        # Select features to scale
        features_to_scale = ['Close', 'Volume', 'MA_10', 'MA_20', 'MA_50', 'RSI', 'MACD', 'Signal_Line', 'Sentiment']
        
        # Check which features actually exist in the dataframe
        available_features = [f for f in features_to_scale if f in df.columns]
        print(f"Using features: {available_features}")
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(df[available_features])
        
        X, y = [], []
        for i in range(lookback, len(scaled_data)-forecast_horizon):
            X.append(scaled_data[i-lookback:i])
            # For multi-day forecasts, we take 'forecast_horizon' days of the 'Close' price
            y.append(scaled_data[i:i+forecast_horizon, 0])  # Assuming 'Close' is the first column
            
        return np.array(X), np.array(y), scaler
    except Exception as e:
        print(f"Error preparing data: {str(e)}")
        return None, None, None

# Load sentiment data (simulated)
def load_sentiment_data(start_date, end_date):
    try:
        dates = pd.date_range(start=start_date, end=end_date)
        sentiment = np.random.uniform(-1, 1, size=len(dates))
        return pd.DataFrame({'Date': dates, 'Sentiment': sentiment})
    except Exception as e:
        print(f"Error loading sentiment data: {str(e)}")
        return pd.DataFrame()  # Return empty DataFrame on error

# Define model architectures
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(5))  # Output layer (assuming forecast_horizon=5)
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(5))  # Output layer (assuming forecast_horizon=5)
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

def build_transformer_model(input_shape):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Add positional encoding
    x = inputs
    
    # Multi-head attention block
    attention_output = MultiHeadAttention(
        num_heads=4, key_dim=32)(x, x)
    attention_output = Dropout(0.1)(attention_output)
    x = LayerNormalization(epsilon=1e-6)(x + attention_output)
    
    # Feed-forward network
    ffn = Sequential([
        Dense(128, activation='relu'),
        Dense(input_shape[-1])
    ])
    ffn_output = ffn(x)
    ffn_output = Dropout(0.1)(ffn_output)
    x = LayerNormalization(epsilon=1e-6)(x + ffn_output)
    
    # Flatten and output layer
    x = Flatten()(x)
    outputs = Dense(5)(x)  # Output layer (assuming forecast_horizon=5)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Define Trading Environment for RL example
class TradingEnvironment:
    def __init__(self, price_data, initial_balance=10000):
        self.price_data = price_data
        self.initial_balance = initial_balance
        self.reset()
        
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0  # 0 = no position, 1 = long
        self.current_step = 0
        self.total_steps = len(self.price_data)
        return self._get_observation()
        
    def _get_observation(self):
        if self.current_step >= self.total_steps:
            return None
        return self.price_data[self.current_step]
        
    def step(self, action):
        # Action: 0 = hold, 1 = buy, 2 = sell
        if self.current_step >= self.total_steps - 1:
            return None, 0, True, {}
            
        reward = 0
        # Current and next price (using close price)
        current_price = self.price_data[self.current_step][0][-1][0]  # Last timepoint, close price
        self.current_step += 1
        next_price = self.price_data[self.current_step][0][-1][0]
        
        price_change = next_price - current_price
        
        # Execute action
        if action == 1 and self.position == 0:  # Buy
            self.position = 1
            reward = price_change
        elif action == 2 and self.position == 1:  # Sell
            self.position = 0
            reward = -price_change
            
        # If holding position, reward/penalty based on price movement
        if self.position == 1:
            reward = price_change
            
        done = self.current_step >= self.total_steps - 1
        
        return self._get_observation(), reward, done, {}

# Evaluation function
def evaluate_model(model, X_test, y_test, scaler, feature_pos=0):
    """
    Evaluate model predictions and calculate error metrics.
    
    Args:
        model: Trained model
        X_test: Test features
        y_test: True values
        scaler: Fitted scaler used for inverse transformation
        feature_pos: Position of the target feature in the scaled data (default: 0 for 'Close')
    
    Returns:
        mse: Mean squared error
        y_true: Original scale true values
        y_pred: Original scale predictions
    """
    try:
        # Get predictions
        predictions = model.predict(X_test)
        
        # For multi-day forecasts, we'll only evaluate the first day for simplicity
        first_day_preds = predictions[:, 0]
        first_day_true = y_test[:, 0]
        
        # Create dummy arrays for inverse scaling
        feature_count = len(scaler.feature_names_in_)
        
        dummy_pred = np.zeros((len(first_day_preds), feature_count))
        dummy_pred[:, feature_pos] = first_day_preds
        
        dummy_true = np.zeros((len(first_day_true), feature_count))
        dummy_true[:, feature_pos] = first_day_true
        
        # Inverse transform
        y_pred = scaler.inverse_transform(dummy_pred)[:, feature_pos]
        y_true = scaler.inverse_transform(dummy_true)[:, feature_pos]
        
        # Calculate MSE
        mse = mean_squared_error(y_true, y_pred)
        
        return mse, y_true, y_pred
    except Exception as e:
        print(f"Error in model evaluation: {str(e)}")
        return float('inf'), None, None

## Main Execution
if __name__ == "__main__":
    print("Downloading and preparing data...")
    
    # 1. Load and prepare data
    sp500_data = download_sp500_data()
    
    if sp500_data is None or sp500_data.empty:
        print("Failed to get S&P 500 data. Exiting.")
        exit()
        
    print(f"Downloaded data shape: {sp500_data.shape}")
    
    # Remove any NaT values in the index
    sp500_data = sp500_data[sp500_data.index.notnull()]
    
    if sp500_data.empty:
        print("No valid data after filtering. Exiting.")
        exit()
    
    # Add technical indicators
    sp500_data = add_technical_indicators(sp500_data.copy())  # Make sure to use a copy
    
    if sp500_data is None or sp500_data.empty:
        print("Failed to add technical indicators. Exiting.")
        exit()
    
    # Generate sentiment data with matching dates
    try:
        start_date = sp500_data.index.min().strftime('%Y-%m-%d')
        end_date = sp500_data.index.max().strftime('%Y-%m-%d')
        sentiment_data = load_sentiment_data(start_date, end_date)
        
        # Merge sentiment data - ensure we're using the index properly
        sp500_data = sp500_data.reset_index()  # Convert index to column
        sp500_data = sp500_data.merge(sentiment_data, left_on='Date', right_on='Date', how='left')
        sp500_data['Sentiment'] = sp500_data['Sentiment'].fillna(0)
        sp500_data.set_index('Date', inplace=True)
    except Exception as e:
        print(f"Error processing dates or merging sentiment data: {str(e)}")
        # Add a default sentiment column if merging fails
        sp500_data['Sentiment'] = 0
    
    # Prepare data for modeling
    print("Preparing training data...")
    X, y, scaler = prepare_data(sp500_data)
    
    if X is None or y is None:
        print("Failed to prepare data. Exiting.")
        exit()
        
    print(f"Prepared data shapes - X: {X.shape}, y: {y.shape}")
    
    # Split data
    train_size = int(0.8 * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # 2. Train models
    print("\nTraining LSTM model...")
    lstm_model = build_lstm_model(X_train.shape[1:])
    lstm_history = lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, 
                                validation_split=0.2, verbose=1)
    
    print("\nTraining CNN model...")
    cnn_model = build_cnn_model(X_train.shape[1:])
    cnn_history = cnn_model.fit(X_train, y_train, epochs=20, batch_size=32, 
                               validation_split=0.2, verbose=1)
    
    print("\nTraining Transformer model...")
    transformer_model = build_transformer_model(X_train.shape[1:])
    transformer_history = transformer_model.fit(X_train, y_train, epochs=20, batch_size=32, 
                                             validation_split=0.2, verbose=1)
    
    # 3. Evaluate models
    print("\nEvaluating models...")
    lstm_mse, lstm_true, lstm_pred = evaluate_model(lstm_model, X_test, y_test, scaler)
    cnn_mse, cnn_true, cnn_pred = evaluate_model(cnn_model, X_test, y_test, scaler)
    transformer_mse, transformer_true, transformer_pred = evaluate_model(transformer_model, X_test, y_test, scaler)
    
    print(f"\nLSTM MSE: {lstm_mse:.4f}")
    print(f"CNN MSE: {cnn_mse:.4f}")
    print(f"Transformer MSE: {transformer_mse:.4f}")
    
    # 4. Plot results if data is available
    if lstm_true is not None and lstm_pred is not None:
        plt.figure(figsize=(15, 6))
        plt.plot(lstm_true, label='Actual Price', alpha=0.7)
        plt.plot(lstm_pred, label='LSTM Prediction', alpha=0.7)
        plt.plot(cnn_pred, label='CNN Prediction', alpha=0.7)
        plt.plot(transformer_pred, label='Transformer Prediction', alpha=0.7)
        plt.title('Model Predictions vs Actual Prices')
        plt.xlabel('Time')
        plt.ylabel('Price')
        plt.legend()
        plt.show()
    
    # 5. Simplified RL example
    print("\nRunning simplified RL example...")
    try:
        env = TradingEnvironment(X_train)
        state = env.reset()
        done = False
        total_reward = 0
        
        if state is not None:
            while not done:
                action = np.random.choice([0, 1, 2])  # Random policy for demonstration
                result = env.step(action)
                if result is None:
                    break
                next_state, reward, done, _ = result
                total_reward += reward
                state = next_state
                
            print(f"Random Policy Total Reward: {total_reward:.2f}")
        else:
            print("Failed to initialize trading environment.")
    except Exception as e:
        print(f"Error in RL simulation: {str(e)}")

2025-04-17 16:14:58.798785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744906499.044924      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744906499.119921      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Downloading and preparing data...


[*********************100%***********************]  1 of 1 completed


Failed to download data for ^GSPC. Please check your internet connection or ticker symbol.
Failed to get S&P 500 data. Exiting.


AttributeError: 'NoneType' object has no attribute 'shape'