In [None]:
%%time
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler
import joblib
import tensorflow as tf

# List of stock tickers to process
# stock_list = ['AAPL', 'TSLA']

# Directories for input, output, models, and scalers
processed_data_dir = "..//data//processed//"
models_dir = "..//models//"
output_dir = "..//data//results//"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)


def main():
    for ticker in stock_list:
        try:
            print(f"Processing {ticker}...")

            # File paths
            train_file = f"{processed_data_dir}{ticker.lower()}_stock_price_processed_train.csv"
            validate_file = f"{processed_data_dir}{ticker.lower()}_stock_price_processed_validate.csv"
            test_file = f"{processed_data_dir}{ticker.lower()}_stock_price_processed_test.csv"
            scaler_file = f"{models_dir}{ticker.lower()}_stock_price_scaler.gz"
            model_file = f"{models_dir}{ticker.lower()}_stock_price_lstm.model.keras"

            # Load processed data
            data_train_df = pd.read_csv(train_file)
            data_validate_df = pd.read_csv(validate_file)
            data_test_df = pd.read_csv(test_file)

            # Convert 'Date' column to datetime
            data_train_df["Date"] = pd.to_datetime(data_train_df["Date"])
            data_validate_df["Date"] = pd.to_datetime(data_validate_df["Date"])
            data_test_df["Date"] = pd.to_datetime(data_test_df["Date"])

            # Extract dates and features
            features = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]
            data_train = data_train_df[features].values
            data_validate = data_validate_df[features].values
            data_test = data_test_df[features].values

            # Combine all datasets
            data_all = np.concatenate([data_train, data_validate, data_test], axis=0)
            sequence_size = 60  # Define sequence size

            # Construct LSTM input/output
            X_train, y_train = construct_lstm_data(data_train, sequence_size, 0)
            train_size = len(data_train)
            validate_size = len(data_validate)

            X_validate, y_validate = construct_lstm_data(
                data_all[train_size-sequence_size:train_size+validate_size], sequence_size, 0
            )
            X_test, y_test = construct_lstm_data(data_all[-(len(data_test) + sequence_size):], sequence_size, 0)

            # Initialize the LSTM model
            model = Sequential()
            model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
            model.add(LSTM(units=100, return_sequences=True))
            model.add(Dropout(0.2))
            model.add(LSTM(units=100, return_sequences=True))
            model.add(Dropout(0.2))
            model.add(LSTM(units=100, return_sequences=True))
            model.add(Dropout(0.2))
            model.add(LSTM(units=100))
            model.add(Dropout(0.2))
            model.add(Dense(1))

            # Compile the model
            model.compile(optimizer="adam", loss="mean_squared_error")

            # Checkpoint to save the best model
            checkpoint = ModelCheckpoint(model_file, monitor="val_loss", save_best_only=True, mode="min", verbose=0)

            # Train the model
            print("Training LSTM model...")
            with tf.device('/GPU:0'):
                history = model.fit(
                    X_train, y_train,
                    validation_data=(X_validate, y_validate),
                    epochs=200,
                    batch_size=64,
                    callbacks=[checkpoint],
                    verbose=1
                )

            # Plot LSTM Model Performance
            plt.figure(figsize=(18, 6))
            plt.plot(history.history["loss"], label="Training Loss")
            plt.plot(history.history["val_loss"], label="Validation Loss")
            plt.title(f"LSTM Model Performance for {ticker}")
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.legend()
            plt.grid()
            plt.savefig(f"{output_dir}{ticker.lower()}_model_performance.png")
            plt.show()

            # Load the best model
            best_model = load_model(model_file)

            # Make future predictions
            scaler = joblib.load(scaler_file)
            future_predictions = predict_future(best_model, X_test[-1], days_to_predict=30, scaler=scaler)

            # Visualize predictions
            plt.figure(figsize=(10, 6))
            plt.plot(range(1, 31), future_predictions, label="Future Predictions", color="blue")
            plt.title(f"30-Day Stock Price Predictions for {ticker}")
            plt.xlabel("Days Ahead")
            plt.ylabel("Predicted Stock Price (USD)")
            plt.legend()
            plt.grid()
            plt.savefig(f"{output_dir}{ticker.lower()}_30_day_predictions.png")
            plt.show()

            print(f"Completed processing for {ticker}.\n")

        except Exception as e:
            print(f"Error processing {ticker}: {e}")

if __name__ == "__main__":
    main()
    print("All tickers processed successfully.")
