In [1]:
# --- Imports ---
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Deep Learning specific imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from keras_tuner import RandomSearch

# --- Technical Indicator Functions ---


def calculate_macd(series, fast=12, slow=26, signal=9):
    """Calculates MACD, Signal Line, and MACD Histogram."""
    exp1 = series.ewm(span=fast, adjust=False).mean()
    exp2 = series.ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line


def calculate_rsi(series, window=14):
    """Calculates the Relative Strength Index (RSI)."""
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).ewm(span=window, adjust=False).mean()
    loss = (-delta.where(delta < 0, 0)).ewm(span=window, adjust=False).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


def calculate_atr(df, high_col, low_col, close_col, window=14):
    """Calculates the Average True Range (ATR)."""
    df['high_low'] = df[high_col] - df[low_col]
    df['high_close'] = np.abs(df[high_col] - df[close_col].shift(1))
    df['low_close'] = np.abs(df[low_col] - df[close_col].shift(1))
    df['TrueRange'] = df[['high_low', 'high_close', 'low_close']].max(axis=1)
    atr = df['TrueRange'].ewm(span=window, adjust=False).mean()
    df.drop(columns=['high_low', 'high_close',
            'low_close', 'TrueRange'], inplace=True)
    return atr


# --- Global Configurations ---
TARGET = 'Buying'
TIMESTEPS = 60
SPLIT_DATE = '2021-01-01'


# --- 1. Data Loading and Feature Augmentation ---

df = pd.read_csv("datasets/cleaned/merged_fx_dataset.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by='Date')

# To calculate ATR, we need previous day's data. We assume 'Buying' is the Close price,
# and use 'Selling' and 'Buying' as proxies for High/Low, since they define the daily range.
df['High_Proxy'] = df[['Buying', 'Selling']].max(axis=1)
df['Low_Proxy'] = df[['Buying', 'Selling']].min(axis=1)

# New Target: Daily Log Return (for stationarity)
df['Log_Return'] = np.log(df[TARGET] / df[TARGET].shift(1))
NEW_TARGET = 'Log_Return'

# --- Feature Augmentation ---
# A. Technical Indicators
df['RSI_14'] = calculate_rsi(df[TARGET], window=14)
df['MACD_12_26'], df['MACD_Signal'] = calculate_macd(df[TARGET])

# B. Volatility & Risk Metrics
df['ATR_14'] = calculate_atr(
    df.copy(), 'High_Proxy', 'Low_Proxy', TARGET, window=14)
df['Log_Return_Std_20d'] = df[NEW_TARGET].rolling(window=20).std()

# C. Economic Differentials & Base Features (using MidRate as a base for technicals)
df.set_index('Date', inplace=True)
DROP_COLS = [TARGET, 'Selling', 'MidRate',
             'High_Proxy', 'Low_Proxy', NEW_TARGET]
FEATURES = df.drop(columns=DROP_COLS).columns.tolist()

# Add core features (using same logic as before, just listing here for completeness)
df['InterestRate_Diff'] = df['GhInterestRate'] - df['USInterestRate']
df['Inflation_Diff'] = df['GhInflationRate'] - df['USInflationRate']
df['Trade_Balance'] = df['Exports'] - df['Imports']
df['DayOfWeek'] = df.index.dayofweek
df['DayOfYear'] = df.index.dayofyear
df['Is_Month_End'] = df.index.is_month_end.astype(int)
QUARTERLY_SHIFT = 65
df['GhGDP_QoQ_Growth'] = (
    df['GhGDP'] / df['GhGDP'].shift(QUARTERLY_SHIFT) - 1) * 100
df['USGDP_QoQ_Growth'] = (
    df['USGDP'] / df['USGDP'].shift(QUARTERLY_SHIFT) - 1) * 100

# Finalize feature list
NEW_FEATURES = ['RSI_14', 'MACD_12_26', 'MACD_Signal', 'ATR_14', 'Log_Return_Std_20d',
                'InterestRate_Diff', 'Inflation_Diff', 'Trade_Balance',
                'DayOfWeek', 'DayOfYear', 'Is_Month_End',
                'GhGDP_QoQ_Growth', 'USGDP_QoQ_Growth']
FINAL_FEATURES = [f for f in FEATURES if f not in DROP_COLS] + NEW_FEATURES

# Final clean and split preparation
df_clean = df.dropna(subset=[NEW_TARGET] + FINAL_FEATURES)
X_data = df_clean[FINAL_FEATURES].values
y_data = df_clean[NEW_TARGET].values.reshape(-1, 1)


# --- 2. Scaling, Sequence Creation, and Split (Fixed Logic) ---

# Use asof to find the nearest previous index if SPLIT_DATE doesn't exist exactly
if SPLIT_DATE in df_clean.index:
    split_idx = df_clean.index.get_loc(SPLIT_DATE)
else:
    split_date_actual = df_clean.index[df_clean.index <= SPLIT_DATE].max()
    split_idx = df_clean.index.get_loc(split_date_actual)

# Split raw data first
X_train_raw, X_test_raw = X_data[:split_idx], X_data[split_idx:]
y_train_raw, y_test_raw = y_data[:split_idx], y_data[split_idx:]

# Fit scalers ONLY on the training data
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler_X.fit_transform(X_train_raw)
X_test_scaled = scaler_X.transform(X_test_raw)

scaler_y = MinMaxScaler(feature_range=(0, 1))
y_train_scaled = scaler_y.fit_transform(y_train_raw)
y_test_scaled = scaler_y.transform(y_test_raw)

# Create sequences


def create_sequences(X, y, timesteps):
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:i + timesteps, :])
        y_seq.append(y[i + timesteps, 0])
    return np.array(X_seq), np.array(y_seq)


X_train_seq, y_train_seq = create_sequences(
    X_train_scaled, y_train_scaled, TIMESTEPS)
X_test_seq, y_test_seq = create_sequences(
    X_test_scaled, y_test_scaled, TIMESTEPS)
N_FEATURES = X_train_seq.shape[2]

print(f"Data Prepared. X_train_seq shape: {X_train_seq.shape}")


# --- 3. Keras Tuner Model Definition (BiLSTM/GRU Architecture) ---

def build_model_bilstm(hp):
    """Defines the BiLSTM/GRU architecture with searchable hyperparameters."""
    model = Sequential()

    # Decide between LSTM and GRU
    layer_type = hp.Choice('rnn_layer_type', values=['lstm', 'gru'])

    # Define the number of RNN layers (1 or 2)
    num_rnn_layers = hp.Int('num_rnn_layers', min_value=1, max_value=2, step=1)

    for i in range(num_rnn_layers):
        rnn_units = hp.Int(f'units_{i}', min_value=32, max_value=128, step=32)

        # Select the layer based on the choice
        if layer_type == 'lstm':
            rnn_layer = LSTM(rnn_units, return_sequences=(
                i < num_rnn_layers - 1))
        else:  # gru
            rnn_layer = GRU(rnn_units, return_sequences=(
                i < num_rnn_layers - 1))

        # Add Bidirectional wrapper for enhanced sequence context
        model.add(Bidirectional(rnn_layer,
                                input_shape=(TIMESTEPS, N_FEATURES) if i == 0 else None))

        # Searchable dropout rate
        model.add(
            Dropout(hp.Float(f'dropout_{i}', min_value=0.2, max_value=0.5, step=0.1)))

    # Output layer
    model.add(Dense(units=1))

    # Compile the model with a fine-grained searchable learning rate
    hp_learning_rate = hp.Choice('learning_rate', values=[
                                 1e-2, 5e-3, 1e-3, 5e-4, 1e-4])
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='mse')
    return model


# --- 4. Hyperparameter Search ---
tscv = TimeSeriesSplit(n_splits=3)

tuner = RandomSearch(
    build_model_bilstm,
    objective='val_loss',
    max_trials=15,  # Increased trials for the larger search space
    executions_per_trial=1,
    directory='keras_tuner_fx_advanced',
    project_name='lstm_fx_advanced',
    overwrite=True
)

print("\nStarting Keras Tuner Random Search (Advanced Features & BiLSTM)...")

# Perform Walk-Forward Search
for fold, (train_index, val_index) in enumerate(tscv.split(X_train_seq)):
    print(f"\n--- Fitting Fold {fold+1} ---")

    X_train_fold, X_val_fold = X_train_seq[train_index], X_train_seq[val_index]
    y_train_fold, y_val_fold = y_train_seq[train_index], y_train_seq[val_index]

    tuner.search(
        X_train_fold, y_train_fold,
        epochs=15,
        validation_data=(X_val_fold, y_val_fold),
        callbacks=[tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=5)]
    )

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"\nOptimal Hyperparameters Found: {best_hps.values}")

# --- 5. Final Model Training and Evaluation ---
final_model = tuner.hypermodel.build(best_hps)

print("\nTraining final model on full fixed training set...")
final_model.fit(
    X_train_seq, y_train_seq,
    epochs=50,
    batch_size=32,
    validation_data=(X_test_seq, y_test_seq),
    callbacks=[tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=10, restore_best_weights=True)],
    verbose=0
)

# Predict on the hold-out test set
y_pred_scaled = final_model.predict(X_test_seq)

# Inverse transform to original Log Return scale
y_test_unscaled = scaler_y.inverse_transform(y_test_seq.reshape(-1, 1))
y_pred_unscaled = scaler_y.inverse_transform(y_pred_scaled)

# Calculate final metrics on the Log Returns
mae = mean_absolute_error(y_test_unscaled, y_pred_unscaled)
mse = mean_squared_error(y_test_unscaled, y_pred_unscaled)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_unscaled, y_pred_unscaled)

print("\n" + "="*70)
print("--- Final Model Evaluation (Advanced BiLSTM/GRU) on Test Set ---")
print("Target: Daily Log Returns (Stationary)")
print(f"TimeSteps (Lookback Window): {TIMESTEPS} days")
print("-" * 70)
print(f"Mean Absolute Error (MAE): {mae:.6f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.6f}")
print(f"R-squared (R²): {r2:.4f}")
print(
    f"R-squared Target Achievement: {'ACHIEVED' if r2 >= 0.70 else 'NOT ACHIEVED'}")
print(f"Optimal RNN Type: {best_hps.get('rnn_layer_type')}")
print(f"Optimal Learning Rate: {best_hps.get('learning_rate')}")
print("="*70)

Trial 15 Complete [00h 00m 29s]
val_loss: 0.004003169480711222

Best val_loss So Far: 0.0023282570764422417
Total elapsed time: 00h 04m 53s

--- Fitting Fold 2 ---

--- Fitting Fold 3 ---

Optimal Hyperparameters Found: {'rnn_layer_type': 'lstm', 'num_rnn_layers': 2, 'units_0': 128, 'dropout_0': 0.2, 'learning_rate': 0.005, 'units_1': 128, 'dropout_1': 0.4}

Training final model on full fixed training set...
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 73ms/step

--- Final Model Evaluation (Advanced BiLSTM/GRU) on Test Set ---
Target: Daily Log Returns (Stationary)
TimeSteps (Lookback Window): 60 days
----------------------------------------------------------------------
Mean Absolute Error (MAE): 0.003373
Root Mean Squared Error (RMSE): 0.010068
R-squared (R²): -0.1967
R-squared Target Achievement: NOT ACHIEVED
Optimal RNN Type: lstm
Optimal Learning Rate: 0.005
