# 1.import

In [21]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

print("Current working directory:", os.getcwd())


Current working directory: c:\Users\hp\Desktop\10 Acadamy\VS code\portfolio-forecasting-gmf\notebooks


# 2. Settings

In [22]:
TICKERS = ["TSLA", "BND", "SPY"]
DATA_DIR = os.path.join("..", "data", "processed")  # Adjust if running from notebooks folder
RESULTS_DIR = "results"
os.makedirs(RESULTS_DIR, exist_ok=True)

TEST_SIZE = 30
SEQ_LENGTH = 30

def create_sequences(data, seq_length=SEQ_LENGTH):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

all_metrics = []
all_forecasts = []

# 3.Check processed data files exist

In [23]:
print(f"Files in {DATA_DIR}:", os.listdir(DATA_DIR))
for ticker in TICKERS:
    file_path = os.path.join(DATA_DIR, f"{ticker}_processed.csv")
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Processed data file not found: {file_path}")
    print(f"Found file for {ticker}: {file_path}")

Files in ..\data\processed: ['BND_processed.csv', 'SPY_processed.csv', 'TSLA_processed.csv']
Found file for TSLA: ..\data\processed\TSLA_processed.csv
Found file for BND: ..\data\processed\BND_processed.csv
Found file for SPY: ..\data\processed\SPY_processed.csv


 # 4. Main loop: Load, preprocess, train LSTM, forecast

In [24]:
for ticker in TICKERS:
    print(f"\nProcessing {ticker}...")

    # Load processed data
    file_path = os.path.join(DATA_DIR, f"{ticker}_processed.csv")
    df = pd.read_csv(file_path, index_col=0, parse_dates=True)

    ts = df['log_adjclose'].dropna()

    # Train-test split
    train_ts = ts.iloc[:-TEST_SIZE].values.reshape(-1,1)
    test_ts = ts.iloc[-TEST_SIZE:].values.reshape(-1,1)

    # Scale data
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_ts)
    test_scaled = scaler.transform(test_ts)

    # Prepare sequences
    X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
    test_full = np.concatenate((train_scaled[-SEQ_LENGTH:], test_scaled), axis=0)
    X_test, y_test = create_sequences(test_full, SEQ_LENGTH)

    print(f"Train samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

    # Define LSTM model
    model = Sequential([
        LSTM(50, activation='relu', input_shape=(SEQ_LENGTH,1)),
        Dropout(0.2),
        Dense(1)
    ])

    model.compile(optimizer='adam', loss='mse')

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_split=0.1,
        epochs=100,
        batch_size=32,
        callbacks=[early_stop],
        verbose=1
    )

    # Predict
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred_scaled)
    y_test_orig = scaler.inverse_transform(y_test)

    # Evaluate
    mae = mean_absolute_error(y_test_orig, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))
    print(f"{ticker} Test MAE: {mae:.4f}, RMSE: {rmse:.4f}")

    all_metrics.append({"Ticker": ticker, "MAE": mae, "RMSE": rmse})

    # Save forecast DataFrame
    forecast_df = pd.DataFrame({
        "Date": ts.index[-len(y_test_orig):],
        "Actual": y_test_orig.flatten(),
        "Forecast": y_pred.flatten(),
        "Ticker": ticker
    }).set_index("Date")

    save_path = os.path.join(RESULTS_DIR, f"forecast_lstm_{ticker}.csv")
    forecast_df.to_csv(save_path)
    print(f"Saved forecast for {ticker} to {save_path}")

    all_forecasts.append(forecast_df)



Processing TSLA...
Train samples: 2571, Test samples: 30


  super().__init__(**kwargs)


Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 40ms/step - loss: 0.1044 - val_loss: 0.0049
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - loss: 0.0098 - val_loss: 0.0052
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.0101 - val_loss: 0.0048
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 0.0092 - val_loss: 0.0049
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0090 - val_loss: 0.0048
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0090 - val_loss: 0.0051
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0091 - val_loss: 0.0047
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0087 - val_loss: 0.0046
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━

  super().__init__(**kwargs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 76ms/step - loss: 0.1000 - val_loss: 0.0059
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.0099 - val_loss: 0.0062
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0094 - val_loss: 0.0059
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 0.0094 - val_loss: 0.0059
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.0096 - val_loss: 0.0057
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 0.0087 - val_loss: 0.0057
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0079 - val_loss: 0.0059
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.0076 - val_loss: 0.0067
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 0.0755 - val_loss: 0.0111
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0123 - val_loss: 0.0105
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0114 - val_loss: 0.0117
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - loss: 0.0116 - val_loss: 0.0105
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0117 - val_loss: 0.0114
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0114 - val_loss: 0.0099
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 0.0110 - val_loss: 0.0099
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0105 - val_loss: 0.0108
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━

# 5. Combine all forecasts and metrics

In [25]:
combined_forecasts = pd.concat(all_forecasts)
combined_forecasts.to_csv(os.path.join(RESULTS_DIR, "forecasts_lstm_all.csv"))

metrics_df = pd.DataFrame(all_metrics)
metrics_df.to_csv(os.path.join(RESULTS_DIR, "metrics_lstm_all.csv"), index=False)

print("\nAll tickers processed.")
print(metrics_df)


All tickers processed.
  Ticker       MAE      RMSE
0   TSLA  0.225054  0.289491
1    BND  0.120981  0.158341
2    SPY  0.141365  0.166203
