### Step 1: Import Datasets and merge

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load macroeconomic and forex datasets
macro = pd.read_csv("macro_data.csv", parse_dates=["DATE"]).set_index("DATE")
forex = pd.read_csv("forex_merged_cleaned.csv", parse_dates=["DATE"]).set_index("DATE")

In [3]:
print("Macro Data:")
print(macro.head(25))

print("\nForex Data:")
print(forex.head(25))

Macro Data:
            Trade Balance  Inflation (CPI)  Core Inflation  Interest Rate  \
DATE                                                                        
2002-01-01       -29678.0            177.7           188.7           1.73   
2002-02-01       -32501.0            178.0           189.1           1.74   
2002-03-01       -31161.0            178.5           189.2           1.73   
2002-04-01       -33965.0            179.3           189.7           1.75   
2002-05-01       -34026.0            179.5           190.0           1.75   
2002-06-01       -34732.0            179.6           190.2           1.75   
2002-07-01       -33913.0            180.0           190.5           1.73   
2002-08-01       -36385.0            180.5           191.1           1.74   
2002-09-01       -36088.0            180.8           191.3           1.75   
2002-10-01       -35095.0            181.2           191.5           1.75   
2002-11-01       -39515.0            181.5           191.9      

In [10]:
# Convert exchange rates to log returns
log_returns = np.log(forex / forex.shift(1))
log_returns = log_returns.dropna()
log_returns.columns = [col + " Return" for col in log_returns.columns]

In [17]:
# Align and merge macro with log returns on DATE
full_df = pd.merge(macro, log_returns, left_index=True, right_index=True, how='inner')
# Drop rows with any missing data (can customize later)
full_df = full_df.dropna()
full_df.head()

Unnamed: 0_level_0,Trade Balance,Inflation (CPI),Core Inflation,Interest Rate,Industrial Production,Retail Sales,Consumer Sentiment,Unemployment Rate,Manufacturing PMI,S&P 500 Index,...,USD-AUD Return,USD-CAD Return,USD-CHF Return,USD-CNY Return,USD-EUR Return,USD-GBP Return,USD-HKD Return,USD-JPY Return,USD-NZD Return,USD-XAU Return
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-03-01,-31161.0,178.5,189.2,1.73,89.1265,284262.0,95.7,5.7,50.7,1106.73,...,-0.020278,0.007835,-0.013291,-1.2e-05,-0.011569,-0.004667,1.3e-05,-0.009849,-0.013128,-0.046385
2002-04-01,-33965.0,179.3,189.7,1.75,89.5507,288833.0,93.0,5.9,52.4,1147.39,...,-0.027978,-0.004192,-0.009059,0.000109,-0.002642,-0.005979,5.1e-05,-0.004735,-0.043389,-0.02099
2002-05-01,-34026.0,179.5,190.0,1.75,89.9348,284951.0,96.9,5.8,52.4,1076.92,...,-0.01027,-0.017201,-0.038686,-1.2e-05,-0.032505,-0.022262,-9e-05,-0.032077,-0.015546,-0.018349
2002-06-01,-34732.0,179.6,190.2,1.75,90.6736,287372.0,92.4,5.8,53.1,1067.14,...,-0.052648,-0.025715,-0.031208,-9.7e-05,-0.03674,0.001854,9e-05,-0.034186,-0.071806,-0.057158
2002-07-01,-33913.0,180.0,190.5,1.73,90.6436,290385.0,88.1,5.8,53.6,989.81,...,0.007075,-0.006896,-0.056956,7.2e-05,-0.059528,-0.052341,-5.1e-05,-0.038989,-0.013434,0.038466


### Step 2: Run OLS Regression per Currency

In [21]:
import statsmodels.api as sm

# 1. Identify columns
macro_vars = macro.columns.tolist()
fx_targets = [col for col in full_df.columns if "Return" in col]

# 2. Store results
results = []

# 3. Loop through each currency return and run OLS
for target in fx_targets:
    X = full_df[macro_vars]
    y = full_df[target]
    
    # Add intercept
    X_const = sm.add_constant(X)
    
    # Fit OLS model
    model = sm.OLS(y, X_const).fit()
    
    # Store results
    results.append({
        "Currency": target.replace(" Return", ""),
        "R_squared": model.rsquared,
        "Adj_R_squared": model.rsquared_adj,
        "Coefficients": model.params.to_dict(),
        "P_values": model.pvalues.to_dict()
    })

In [31]:
# Flatten into a readable summary table
summary = pd.DataFrame({
    "Currency": [res["Currency"] for res in results],
    "R_squared": [res["R_squared"] for res in results],
    "Adj_R_squared": [res["Adj_R_squared"] for res in results]
})

summary.sort_values("R_squared", ascending=False, inplace=True)
summary.head(10)

Unnamed: 0,Currency,R_squared,Adj_R_squared
0,USD-AUD,0.211995,0.179161
8,USD-NZD,0.190837,0.157121
1,USD-CAD,0.173453,0.139014
5,USD-GBP,0.140995,0.105203
4,USD-EUR,0.104982,0.067689
7,USD-JPY,0.086948,0.048904
3,USD-CNY,0.076104,0.037608
2,USD-CHF,0.070586,0.03186
9,USD-XAU,0.058102,0.018856
6,USD-HKD,0.036166,-0.003994


In [39]:
# Create a DataFrame with currencies as rows and macro variables as columns
coeff_matrix = pd.DataFrame()

# Loop through results and pull each currency's coefficients
for res in results:
    currency = res["Currency"]
    coeffs = res["Coefficients"]
    
    # Convert to Series and name it with the currency
    coeff_series = pd.Series(coeffs, name=currency)
    
    # Append as a new row to the matrix
    coeff_matrix = pd.concat([coeff_matrix, coeff_series.to_frame().T], axis=0)

# Set currency names as the index
coeff_matrix.index.name = "Currency"

# Drop the constant (intercept) column if you only want macro variable effects
coeff_matrix = coeff_matrix.drop(columns="const", errors='ignore')

pd.set_option('display.float_format', '{:.9f}'.format)
coeff_matrix.head(10)

Unnamed: 0_level_0,Trade Balance,Inflation (CPI),Core Inflation,Interest Rate,Industrial Production,Retail Sales,Consumer Sentiment,Unemployment Rate,Manufacturing PMI,S&P 500 Index,VIX Index
Currency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
USD-AUD,-2.49e-07,0.006025727,-0.005484707,0.000599187,-0.000422744,-1.09e-07,0.001723048,-0.004336749,0.000503811,4.79e-07,0.002766078
USD-CAD,-1.99e-07,0.002020702,-0.001500754,-0.000165796,0.001123213,-1.44e-07,0.000700578,-0.001222078,0.000114279,-6.34e-07,0.001703703
USD-CHF,-4.14e-07,0.002974896,-0.002984984,-0.000803831,-0.00070786,1.27e-07,0.001171354,-0.000521073,-0.00019627,-8.066e-06,0.001062488
USD-CNY,-2.6e-08,0.000315574,-0.000237144,-0.000189358,-0.000532364,6.5e-08,0.000217461,-0.001201075,5.027e-06,-6.502e-06,0.000235927
USD-EUR,-2.61e-07,0.003144222,-0.002880174,-0.00086477,-0.000236976,-1.9e-08,0.000876409,-0.001417752,0.000284979,-2.341e-06,0.001302293
USD-GBP,-6.16e-07,0.001721159,-0.001040305,-0.002424515,-2.9768e-05,-9.6e-08,0.000614166,-0.003079519,-0.000268299,-1.218e-05,0.001171903
USD-HKD,-1.3e-08,-1.9881e-05,1.2239e-05,-9.0273e-05,-2.0606e-05,9e-09,9.31e-07,-8.4403e-05,3.2039e-05,-7.01e-07,-5.488e-06
USD-JPY,-2.22e-07,0.002078786,-0.002745991,-0.001640448,-0.000873885,3.37e-07,0.000651699,0.001269367,-0.000577126,-7.766e-06,-0.000296356
USD-NZD,-3.45e-07,0.005211314,-0.004893475,0.000470543,3.592e-06,-5e-09,0.001580889,-0.003349771,0.000251844,-4.315e-06,0.00277034
USD-XAU,1.2e-08,0.004724525,-0.005302977,-0.004007048,-0.001148365,2.81e-07,0.001366186,-0.002491674,-0.000493926,-3.669e-06,0.000937486


### STEP 3: Train a Predictive Model