# FX Rate Modeling - Exploration Notebook

This notebook provides an interactive way to explore FX data and experiment with different models.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from data.data_loader import FXDataLoader
from data.features import FeatureEngineer
from models.traditional import ARIMAModel, GARCHModel
from models.pytorch_models import LSTMModel, PyTorchModelWrapper
from evaluation.metrics import MetricsCalculator
from utils.visualization import Visualizer

sns.set_style("whitegrid")
%matplotlib inline

## 1. Load FX Data

In [None]:
# Initialize data loader
loader = FXDataLoader()

# Define FX pairs
fx_pairs = ["EURUSD=X", "GBPUSD=X", "USDJPY=X", "USDCHF=X"]

# Fetch data
df = loader.fetch_fx_data(
    pairs=fx_pairs,
    start_date="2020-01-01",
    end_date="2023-12-31"
)

print(f"Data shape: {df.shape}")
print(f"\nDate range: {df.index[0]} to {df.index[-1]}")
df.head()

## 2. Data Visualization

In [None]:
# Plot FX rates
fig, ax = plt.subplots(figsize=(14, 6))
for col in df.columns:
    ax.plot(df.index, df[col], label=col)
ax.set_title("FX Rates")
ax.set_xlabel("Date")
ax.set_ylabel("Exchange Rate")
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()

In [None]:
# Calculate and plot returns
returns = df.pct_change().dropna()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Returns time series
for col in returns.columns:
    axes[0].plot(returns.index, returns[col], label=col, alpha=0.7)
axes[0].set_title("Returns Over Time")
axes[0].legend()

# Returns distribution
returns.boxplot(ax=axes[1])
axes[1].set_title("Returns Distribution")
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 3. Feature Engineering

In [None]:
# Create features
engineer = FeatureEngineer()

feature_config = {
    'returns': True,
    'log_returns': True,
    'volatility_windows': [5, 20, 60],
    'sma_windows': [10, 20, 60],
    'rsi_period': 14
}

features_df = engineer.create_all_features(df, feature_config)
print(f"Original features: {df.shape[1]}")
print(f"Total features: {features_df.shape[1]}")
features_df.head()

## 4. Train/Test Split

In [None]:
# Split data
target_col = df.columns[0]  # Use first FX pair as target
train_df, val_df, test_df = loader.split_data(df, train_ratio=0.8, val_ratio=0.1)

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

## 5. Traditional Models

In [None]:
# Train ARIMA
arima = ARIMAModel()
arima.fit(train_df[target_col])
arima.summary()

In [None]:
# ARIMA Forecast
arima_forecast = arima.predict(len(test_df))
arima_forecast_with_ci = arima.forecast(len(test_df))

# Plot
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(test_df.index, test_df[target_col], label='Actual', color='blue')
ax.plot(test_df.index[:len(arima_forecast)], arima_forecast, 
        label='ARIMA Forecast', color='red')
ax.fill_between(test_df.index[:len(arima_forecast)], 
                arima_forecast_with_ci['lower'],
                arima_forecast_with_ci['upper'],
                alpha=0.3, color='red', label='95% CI')
ax.set_title(f"ARIMA Forecast - {target_col}")
ax.legend()
plt.show()

In [None]:
# Train GARCH
garch = GARCHModel(p=1, q=1)
garch.fit(train_df[target_col])
garch.summary()

## 6. Deep Learning Models

In [None]:
# Prepare sequences for LSTM
from sklearn.preprocessing import StandardScaler

# Use features
feature_cols = [c for c in features_df.columns if c != target_col]
train_features = features_df.loc[train_df.index, feature_cols].dropna()
val_features = features_df.loc[val_df.index, feature_cols].dropna()
test_features = features_df.loc[test_df.index, feature_cols].dropna()

# Scale
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_features)
val_scaled = scaler.transform(val_features)
test_scaled = scaler.transform(test_features)

# Create sequences
seq_length = 60

def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:(i + seq_len)])
        y.append(data[i + seq_len, 0])  # First column as target
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_scaled, seq_length)
X_val, y_val = create_sequences(val_scaled, seq_length)
X_test, y_test = create_sequences(test_scaled, seq_length)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

In [None]:
# Train LSTM
lstm_config = {
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': 0.2,
    'learning_rate': 0.001,
    'batch_size': 32,
    'epochs': 50,
    'early_stopping_patience': 10
}

lstm = PyTorchModelWrapper(LSTMModel, X_train.shape[2], lstm_config)
lstm.fit(X_train, y_train, X_val, y_val, verbose=True)

In [None]:
# Plot training history
history = lstm.model.history
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(history['train_loss'], label='Train')
ax.plot(history['val_loss'], label='Validation')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.set_title('LSTM Training History')
ax.legend()
plt.show()

## 7. Model Comparison

In [None]:
from evaluation.comparator import ModelComparator

comparator = ModelComparator()

# Add ARIMA results
test_aligned = test_df[target_col].iloc[:len(arima_forecast)]
comparator.add_result('ARIMA', test_aligned.values, arima_forecast, horizon=1)

# Add LSTM results
lstm_preds = lstm.predict(X_test)
comparator.add_result('LSTM', y_test, lstm_preds, horizon=1)

# Show comparison
print(comparator.summary())

In [None]:
# Plot comparison
fig, ax = plt.subplots(figsize=(14, 6))

# Actual
ax.plot(test_df.index[:len(test_aligned)], test_aligned.values, 
        label='Actual', color='black', linewidth=2)

# Predictions
ax.plot(test_df.index[:len(arima_forecast)], arima_forecast, 
        label='ARIMA', alpha=0.8)
# Note: LSTM predictions need proper date alignment

ax.set_title("Model Comparison")
ax.legend()
plt.show()

## 8. Save Results

In [None]:
# Save model
# lstm.save('../results/models/lstm_model.pt')

# Save results
# comparator.save_results('../results/comparison.json')

print("Done!")