# What to start with (minimal viable set)

* **For any pair**: policy rate, 2y yield, CPI core YoY (both countries), VIX, FX realized vol, FX returns.
* Engineer: `rate_diff_2y`, `cpi_diff_core`, `rv_20d`.
* Do: PELT on `rate_diff_2y` and `rv_20d`; 2-state HMM on `[ret, rv]`.
* Combine with hard event dates (COVID, 2022 hikes).
* Run MCPT per regime.


NOTE: `rate_diff_2y` will be aggregated

In [1]:
from regime_partitioning.policy_rate_diff import usdjpy_policy_rate_diff

  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  ).ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuffix="_base", rsuffix="_quote").ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuffix="_base", rsuffix="_quote").ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuffix="_base", rsuffix="_quote").ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuffix="_base", rsuffix="_quote").ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuffix="_base", rsuffix="_quote").ffill().infer_objects(copy=False)
  spread = base_m.join(quote_m, how="outer", lsuff

In [2]:
usdjpy_policy_rate_diff.head()

Unnamed: 0,policy_rate_diff
1953-04-30,
1953-05-31,
1953-06-30,
1953-07-31,
1953-08-31,


In [None]:
raise

In [3]:
import pandas as pd
from datetime import datetime
from typing import Optional, Tuple, Dict, Any
from trading_utils.fundamentals import calculate_interest_rate_difference, calculate_dei


def calculate_rate_differential(home_currency: str, foreign_currency: str) -> Optional[pd.DataFrame]:
    """
    Calculate rate differential between two currencies using policy rates.
    
    Args:
        home_currency: Home currency code
        foreign_currency: Foreign currency code
    
    Returns:
        DataFrame with rate differential (rate_diff_2y proxy)
    """
    # Fetch policy rates for both currencies (monthly frequency)
    home_rates = fetch_economic_data(home_currency, 'interest_rates', 'monthly')
    foreign_rates = fetch_economic_data(foreign_currency, 'interest_rates', 'monthly')
    
    if home_rates is None or foreign_rates is None:
        print(f"Failed to fetch rate data for {home_currency} or {foreign_currency}")
        return None
    
    # Align dates and calculate differential
    aligned_data = home_rates.join(foreign_rates, how='inner', rsuffix='_foreign')
    
    # Assume the rate column is named 'rate' or similar
    rate_columns = [col for col in aligned_data.columns if 'rate' in col.lower()]
    if len(rate_columns) >= 2:
        home_rate_col = rate_columns[0]
        foreign_rate_col = rate_columns[1] if len(rate_columns) > 1 else rate_columns[0] + '_foreign'
        
        aligned_data['rate_diff_2y'] = aligned_data[home_rate_col] - aligned_data[foreign_rate_col]
        
        return aligned_data[['rate_diff_2y']]
    else:
        print("Could not identify rate columns in the data")
        return None


def calculate_cpi_differential(home_currency: str, foreign_currency: str, use_core: bool = True) -> Optional[pd.DataFrame]:
    """
    Calculate CPI differential between two currencies.
    
    Args:
        home_currency: Home currency code
        foreign_currency: Foreign currency code  
        use_core: Whether to use core CPI (if available)
    
    Returns:
        DataFrame with CPI differential
    """
    # Fetch CPI data for both currencies (monthly frequency)
    home_cpi = fetch_economic_data(home_currency, 'cpi', 'monthly')
    foreign_cpi = fetch_economic_data(foreign_currency, 'cpi', 'monthly')
    
    if home_cpi is None or foreign_cpi is None:
        print(f"Failed to fetch CPI data for {home_currency} or {foreign_currency}")
        return None
    
    # Align dates and calculate differential
    aligned_data = home_cpi.join(foreign_cpi, how='inner', rsuffix='_foreign')
    
    # Look for core CPI first, then fallback to headline CPI
    cpi_columns = [col for col in aligned_data.columns if 'cpi' in col.lower()]
    core_columns = [col for col in cpi_columns if 'core' in col.lower()]
    
    if use_core and len(core_columns) >= 2:
        home_cpi_col = core_columns[0]
        foreign_cpi_col = core_columns[1]
    elif len(cpi_columns) >= 2:
        home_cpi_col = cpi_columns[0]
        foreign_cpi_col = cpi_columns[1] if len(cpi_columns) > 1 else cpi_columns[0] + '_foreign'
    else:
        print("Could not identify CPI columns in the data")
        return None
    
    aligned_data['cpi_diff_core'] = aligned_data[home_cpi_col] - aligned_data[foreign_cpi_col]
    
    return aligned_data[['cpi_diff_core']]


def resample_to_daily(monthly_data: pd.DataFrame, method: str = 'ffill') -> pd.DataFrame:
    """
    Resample monthly macro data to daily frequency using forward-fill.
    
    Args:
        monthly_data: DataFrame with monthly data
        method: Resampling method ('ffill' for forward-fill)
    
    Returns:
        DataFrame resampled to daily frequency
    """
    if monthly_data.empty:
        return monthly_data
    
    # Ensure we have a DatetimeIndex
    if not isinstance(monthly_data.index, pd.DatetimeIndex):
        print("Warning: Index is not DatetimeIndex, attempting conversion")
        monthly_data.index = pd.to_datetime(monthly_data.index)
    
    # Create daily date range from first to last date
    start_date = monthly_data.index.min()
    end_date = monthly_data.index.max()
    daily_index = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Reindex to daily and forward-fill
    daily_data = monthly_data.reindex(daily_index, method=method)
    
    return daily_data


def build_feature_pipeline(home_currency: str, foreign_currency: str, 
                          fx_returns: Optional[pd.DataFrame] = None,
                          realized_vol: Optional[pd.DataFrame] = None,
                          vix_data: Optional[pd.DataFrame] = None) -> pd.DataFrame:
    """
    Build complete feature pipeline combining macro differentials with FX data.
    
    Args:
        home_currency: Home currency code
        foreign_currency: Foreign currency code
        fx_returns: Daily FX returns (optional)
        realized_vol: Daily realized volatility (optional) 
        vix_data: Daily VIX data (optional)
    
    Returns:
        Combined DataFrame with all features
    """
    features = {}
    
    # 1. Calculate rate differential
    print(f"Calculating rate differential for {home_currency}/{foreign_currency}...")
    rate_diff = calculate_rate_differential(home_currency, foreign_currency)
    if rate_diff is not None:
        # Resample to daily
        rate_diff_daily = resample_to_daily(rate_diff)
        features['rate_diff'] = rate_diff_daily
    
    # 2. Calculate CPI differential  
    print(f"Calculating CPI differential for {home_currency}/{foreign_currency}...")
    cpi_diff = calculate_cpi_differential(home_currency, foreign_currency)
    if cpi_diff is not None:
        # Resample to daily
        cpi_diff_daily = resample_to_daily(cpi_diff)
        features['cpi_diff'] = cpi_diff_daily
    
    # 3. Combine all features
    combined_data = pd.DataFrame()
    
    # Start with the first available feature as base
    for feature_name, feature_data in features.items():
        if combined_data.empty:
            combined_data = feature_data.copy()
        else:
            combined_data = combined_data.join(feature_data, how='outer')
    
    # 4. Add daily FX data if provided
    if fx_returns is not None:
        combined_data = combined_data.join(fx_returns, how='outer')
    
    if realized_vol is not None:
        combined_data = combined_data.join(realized_vol, how='outer')
        
    if vix_data is not None:
        combined_data = combined_data.join(vix_data, how='outer')
    
    # Forward-fill any remaining NaN values
    combined_data = combined_data.fillna(method='ffill')
    
    print(f"Feature pipeline completed. Shape: {combined_data.shape}")
    print(f"Columns: {list(combined_data.columns)}")
    print(f"Date range: {combined_data.index.min()} to {combined_data.index.max()}")
    
    return combined_data


# Convenience function for common currency pairs
def build_eurusd_features(fx_returns: Optional[pd.DataFrame] = None,
                         realized_vol: Optional[pd.DataFrame] = None,
                         vix_data: Optional[pd.DataFrame] = None) -> pd.DataFrame:
    """Build features for EUR/USD pair."""
    return build_feature_pipeline('eur', 'usd', fx_returns, realized_vol, vix_data)


def build_gbpusd_features(fx_returns: Optional[pd.DataFrame] = None,
                         realized_vol: Optional[pd.DataFrame] = None,
                         vix_data: Optional[pd.DataFrame] = None) -> pd.DataFrame:
    """Build features for GBP/USD pair.""" 
    return build_feature_pipeline('gbp', 'usd', fx_returns, realized_vol, vix_data)


# Example usage function
def example_usage():
    """
    Example demonstrating how to use the feature pipeline.
    This shows the minimal viable workflow outlined in the markdown file.
    """
    print("=== Feature Engineering Pipeline Example ===")
    
    # Example 1: Build EUR/USD features with just macro data
    print("\n1. Building EUR/USD features...")
    try:
        eurusd_features = build_eurusd_features()
        if not eurusd_features.empty:
            print(f"SUCCESS: EUR/USD features built with shape {eurusd_features.shape}")
            print(f"Available columns: {list(eurusd_features.columns)}")
        else:
            print("No features were successfully built for EUR/USD")
    except Exception as e:
        print(f"ERROR building EUR/USD features: {e}")
    
    # Example 2: Build GBP/USD features  
    print("\n2. Building GBP/USD features...")
    try:
        gbpusd_features = build_gbpusd_features()
        if not gbpusd_features.empty:
            print(f"SUCCESS: GBP/USD features built with shape {gbpusd_features.shape}")
            print(f"Available columns: {list(gbpusd_features.columns)}")
        else:
            print("No features were successfully built for GBP/USD")
    except Exception as e:
        print(f"ERROR building GBP/USD features: {e}")
    
    # Example 3: Manual rate differential calculation
    print("\n3. Manual calculation example...")
    try:
        rate_diff = calculate_rate_differential('eur', 'usd')
        if rate_diff is not None:
            print(f"Rate differential calculated: {rate_diff.shape}")
            print(f"Sample data:\n{rate_diff.head()}")
        else:
            print("Failed to calculate rate differential")
            
        cpi_diff = calculate_cpi_differential('eur', 'usd')
        if cpi_diff is not None:
            print(f"CPI differential calculated: {cpi_diff.shape}")
            print(f"Sample data:\n{cpi_diff.head()}")
        else:
            print("Failed to calculate CPI differential")
            
    except Exception as e:
        print(f"ERROR in manual calculations: {e}")
    
    print("\n=== Pipeline Example Complete ===")


if __name__ == "__main__":
    # Run example when script is executed directly
    example_usage()



=== Feature Engineering Pipeline Example ===

1. Building EUR/USD features...
Calculating rate differential for eur/usd...
ERROR building EUR/USD features: name 'fetch_economic_data' is not defined

2. Building GBP/USD features...
Calculating rate differential for gbp/usd...
ERROR building GBP/USD features: name 'fetch_economic_data' is not defined

3. Manual calculation example...
ERROR in manual calculations: name 'fetch_economic_data' is not defined

=== Pipeline Example Complete ===
