In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso, LassoCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import statsmodels.api as sm

# ---------------------------------------
# 1. Load and Prepare Data
# ---------------------------------------

def load_and_prepare_data():
    macro = pd.read_csv("macro_data.csv", parse_dates=["DATE"]).set_index("DATE")
    forex = pd.read_csv("forex_merged_cleaned.csv", parse_dates=["DATE"]).set_index("DATE")
    
    log_returns = np.log(forex / forex.shift(1)).dropna()
    log_returns.columns = [col + " Return" for col in log_returns.columns]

    LAG_PERIODS = (1, 2, 3, 4, 5, 12, 24, 60)
    ROLL_WINDOWS = (3, 5, 7, 10)

    def enrich_macro_safe(df):
        fea = df.copy()
        for p in LAG_PERIODS:
            fea = pd.concat([fea, df.shift(p).add_suffix(f"_lag{p}")], axis=1)
        for w in ROLL_WINDOWS:
            fea = pd.concat([
                fea,
                df.rolling(w).mean().shift(1).add_suffix(f"_rollmean{w}"),
                df.rolling(w).std().shift(1).add_suffix(f"_rollstd{w}")
            ], axis=1)
        return fea.dropna()

    macro_fea = enrich_macro_safe(macro)
    
    full_df = pd.merge(macro_fea, log_returns, left_index=True, right_index=True).dropna()
    return full_df, forex

# ---------------------------------------
# 2. Train Model Helper
# ---------------------------------------

def train_model(X_train, y_train, selected_model):
    if selected_model == 'Lasso':
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        
        model = Lasso(alpha=0.01, max_iter=10000)
        model.fit(X_train_scaled, y_train)
        
        return model, scaler
    elif selected_model == 'XGBoost':
        model = XGBRegressor(n_estimators=500, learning_rate=0.05)
        model.fit(X_train, y_train)
        
        return model, None
    elif selected_model == 'OLS':
        X_train_const = sm.add_constant(X_train, has_constant='add')
        model = sm.OLS(y_train, X_train_const).fit()
        return model, None
    elif selected_model == 'LassoCV':
        transformer = Pipeline([('scaler', StandardScaler()),
                                 ('pca', PCA(n_components = 0.95)) # min components needed to explain 95% of the variance in data
                               ])
        X_reduced = transformer.fit_transform(X_train)
        model = LassoCV(cv = TimeSeriesSplit(n_splits = 5),
                        max_iter = 10000,
                        tol = 1e-3,
                       )
        model.fit(X_reduced, y_train)
            
        return model, transformer

    else:
        raise ValueError(f"Model {selected_model} not supported.")

# ---------------------------------------
# 3. Main Full Function
# ---------------------------------------

def run_forex_model(selected_macros, selected_model, macro_adjustments=None, future_years=10):
    """
    selected_macros: list of macro factors like ['Interest Rate', 'Inflation']
    selected_model: 'Lasso', 'LassoCV', 'XGBoost', or 'OLS'
    macro_adjustments: dictionary like {'Interest Rate': 0.5} (optional)
    future_years: number of years to predict forward (5, 7, 10)
    """

    full_df, forex = load_and_prepare_data()
    
    fx_targets = [col for col in full_df.columns if "Return" in col]
    available_macros = [col for col in full_df.columns if any(macro in col for macro in selected_macros)]

    train_end = "2022-12-31"
    train_df = full_df[full_df.index <= train_end]
    test_df = full_df[full_df.index > train_end]

    all_results = []
    all_future_preds = []

    for target in fx_targets:
        y_train = train_df[target]
        y_test = test_df[target]
        X_train = train_df[available_macros]
        X_test = test_df[available_macros]
        
        model, scaler = train_model(X_train, y_train, selected_model)

        # --- Predict on real 2023–2025 test set ---
        if selected_model == 'OLS':
            X_test_const = sm.add_constant(X_test, has_constant='add')
            preds = model.predict(X_test_const)
        else:
            if scaler:
                preds = model.predict(scaler.transform(X_test))
            else:
                preds = model.predict(X_test)

        r2 = r2_score(y_test, preds)
        mae = mean_absolute_error(y_test, preds)
        rmse = np.sqrt(mean_squared_error(y_test, preds))

        all_results.append({
            "Currency": target.replace(" Return", ""),
            "Model": selected_model,
            "Selected_Macros": selected_macros,
            "R2_Score": r2,
            "MAE": mae,
            "RMSE": rmse,
        })

        # --- Predict Future based on macro adjustment ---
        last_known_macros = train_df[available_macros].iloc[-1]

        months = future_years * 12
        future_macro_df = pd.DataFrame([last_known_macros.values] * months, columns=available_macros)

        if macro_adjustments:
            for macro, adjustment in macro_adjustments.items():
                matching_cols = [col for col in available_macros if macro in col]
                for col in matching_cols:
                    future_macro_df[col] += adjustment
        
        # Predict future
        if selected_model == 'OLS':
            future_macro_const = sm.add_constant(future_macro_df, has_constant='add')
            future_preds = model.predict(future_macro_const)
        else:
            if scaler:
                future_macro_scaled = scaler.transform(future_macro_df)
                future_preds = model.predict(future_macro_scaled)
            else:
                future_preds = model.predict(future_macro_df)
        
        # Build predicted forex prices
        forex_pair = target.replace(" Return", "")
        last_real_price = forex[forex_pair].iloc[-1]
        future_price_series = last_real_price * np.exp(pd.Series(future_preds)).cumprod()
        
        future_dates = pd.date_range(start="2025-01-01", periods=months, freq='MS')
        temp_future_df = pd.DataFrame({
            'DATE': future_dates,
            'Currency': forex_pair,
            'Predicted_Return': future_preds,
            'Predicted_Forex_Rate': future_price_series.values
        })
        
        all_future_preds.append(temp_future_df)

    results_df = pd.DataFrame(all_results)
    future_preds_df = pd.concat(all_future_preds)

    return results_df, future_preds_df


In [5]:
# test call
real_metrics, future_predictions = run_forex_model(
    selected_macros=['Interest Rate', 'Inflation'],   
    selected_model='LassoCV',                              
    macro_adjustments={'Interest Rate': 0.5},          
    future_years=7                                     
)

# See outputs
print(real_metrics.head())          # Should show R², MAE, RMSE for real 2023–2025 test data
print(future_predictions.head())    # Should show predicted returns and forex rates into future


  Currency    Model             Selected_Macros  R2_Score       MAE      RMSE
0  USD-AUD  LassoCV  [Interest Rate, Inflation] -0.009747  0.021536  0.026352
1  USD-CAD  LassoCV  [Interest Rate, Inflation] -0.021359  0.013683  0.016418
2  USD-CHF  LassoCV  [Interest Rate, Inflation] -0.004782  0.021248  0.023524
3  USD-CNY  LassoCV  [Interest Rate, Inflation] -0.020634  0.011622  0.014877
4  USD-EUR  LassoCV  [Interest Rate, Inflation] -0.003284  0.016560  0.018896
        DATE Currency  Predicted_Return  Predicted_Forex_Rate
0 2025-01-01  USD-AUD          0.000784              1.569366
1 2025-02-01  USD-AUD          0.000784              1.570597
2 2025-03-01  USD-AUD          0.000784              1.571830
3 2025-04-01  USD-AUD          0.000784              1.573063
4 2025-05-01  USD-AUD          0.000784              1.574297
