In [None]:
#!/usr/bin/env python3
"""
FRED Data Downloader for Meta-Learning Framework
Downloads market regime indicators and cross-asset data from FRED API
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import time
import requests
from pathlib import Path

class FREDDataDownloader:
    """
    Downloads financial data from FRED API for meta-learning framework.
    Focuses on market regime indicators and cross-asset relationships.
    """
    
    def __init__(self, api_key=None):
        """
        Initialize FRED data downloader.
        
        Args:
            api_key: FRED API key. If None, will try to get from environment variable.
        """
        self.api_key = api_key or os.getenv('FRED_API_KEY')
        if not self.api_key:
            print("WARNING: No FRED API key provided. You'll need to register at https://fred.stlouisfed.org/docs/api/api_key.html")
            print("Set the API key with: export FRED_API_KEY='your_key_here'")
        
        self.base_url = "https://api.stlouisfed.org/fred/series/observations"
        self.data_folder = Path("fred")
        self.data_folder.mkdir(parents=True, exist_ok=True)
        
        # Define data series we want to download
        self.data_series = {
            # Market Regime Indicators
            'market_regime': {
                'SP500': 'SP500',  # S&P 500 Index
                'SP500_RETURNS': None,  # Will calculate from SP500
                'VIX_FRED': 'VIXCLS',  # VIX from FRED (for cross-validation)
                'TREASURY_10Y': 'GS10',  # 10-Year Treasury Constant Maturity Rate
                'TREASURY_2Y': 'GS2',   # 2-Year Treasury Constant Maturity Rate
                'TREASURY_3M': 'GS3M',  # 3-Month Treasury Bill
                'TERM_SPREAD': None,    # Will calculate 10Y - 2Y
                'FED_FUNDS': 'FEDFUNDS', # Federal Funds Rate
            },
            
            # Credit and Risk Indicators
            'credit_risk': {
                'CREDIT_SPREAD_AAA': 'AAA',     # Moody's Seasoned Aaa Corporate Bond Yield
                'CREDIT_SPREAD_BAA': 'BAA',     # Moody's Seasoned Baa Corporate Bond Yield
                'CREDIT_SPREAD_IG': None,       # Will calculate BAA - AAA
                'HIGH_YIELD_SPREAD': 'BAMLH0A0HYM2',  # ICE BofA US High Yield Index Option-Adjusted Spread
                'TED_SPREAD': 'TEDRATE',        # TED Spread
            },
            
            # Currency and International
            'currency': {
                'USD_INDEX': 'DTWEXBGS',        # Trade Weighted U.S. Dollar Index: Broad, Goods and Services
                'USD_EUR': 'DEXUSEU',           # U.S. / Euro Foreign Exchange Rate
                'USD_JPY': 'DEXJPUS',           # U.S. / Japan Foreign Exchange Rate
                'USD_GBP': 'DEXUSUK',           # U.S. / U.K. Foreign Exchange Rate
                'EMERGING_MARKET_BOND': 'BAMLEMCBPIOAS', # ICE BofA Emerging Markets Corporate Plus Index Option-Adjusted Spread
            },
            
            # Economic Indicators
            'economic': {
                'UNEMPLOYMENT': 'UNRATE',       # Unemployment Rate
                'INFLATION_CORE': 'CPILFESL',   # Core CPI
                'INFLATION_HEADLINE': 'CPIAUCSL', # Headline CPI
                'GDP_GROWTH': 'GDP',            # Gross Domestic Product
                'CONSUMER_SENTIMENT': 'UMCSENT', # University of Michigan Consumer Sentiment
                'ISM_PMI': 'NAPM',              # ISM Manufacturing PMI
            },
            
            # Market Microstructure (Limited FRED data)
            'market_structure': {
                'MARGIN_DEBT': 'BOGZ1FL663067003Q', # Margin debt (quarterly)
                'REPO_RATE': 'SOFR',                # Secured Overnight Financing Rate
                'LIBOR_USD_3M': 'USD3MTD156N',      # 3-Month London Interbank Offered Rate
            }
        }
        
    def download_series(self, series_id, start_date='1990-01-01', end_date=None):
        """
        Download a single time series from FRED.
        
        Args:
            series_id: FRED series identifier
            start_date: Start date in YYYY-MM-DD format
            end_date: End date in YYYY-MM-DD format (default: today)
            
        Returns:
            pandas.DataFrame with date and value columns
        """
        if not self.api_key:
            print(f"Cannot download {series_id}: No API key provided")
            return None
            
        if end_date is None:
            end_date = datetime.now().strftime('%Y-%m-%d')
        
        params = {
            'series_id': series_id,
            'api_key': self.api_key,
            'file_type': 'json',
            'observation_start': start_date,
            'observation_end': end_date
        }
        
        try:
            print(f"Downloading {series_id}...")
            response = requests.get(self.base_url, params=params)
            response.raise_for_status()
            
            data = response.json()
            
            if 'observations' not in data:
                print(f"No data found for {series_id}")
                return None
                
            # Convert to DataFrame
            observations = data['observations']
            df = pd.DataFrame(observations)
            
            # Clean data
            df['date'] = pd.to_datetime(df['date'])
            df['value'] = pd.to_numeric(df['value'], errors='coerce')
            df = df[['date', 'value']].dropna()
            df = df.rename(columns={'value': series_id})
            
            # Add a small delay to be nice to FRED API
            time.sleep(0.1)
            
            return df
            
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {series_id}: {e}")
            return None
        except Exception as e:
            print(f"Unexpected error for {series_id}: {e}")
            return None
    
    def calculate_derived_series(self, data_dict):
        """
        Calculate derived series from downloaded data.
        
        Args:
            data_dict: Dictionary of DataFrames with downloaded data
            
        Returns:
            Dictionary with additional derived series
        """
        derived = {}
        
        # Calculate S&P 500 returns
        if 'SP500' in data_dict and data_dict['SP500'] is not None:
            sp500 = data_dict['SP500'].copy()
            sp500['SP500_RETURNS'] = sp500['SP500'].pct_change()
            derived['SP500_RETURNS'] = sp500[['date', 'SP500_RETURNS']].dropna()
        
        # Calculate term spread (10Y - 2Y)
        if ('TREASURY_10Y' in data_dict and 'TREASURY_2Y' in data_dict and 
            data_dict['TREASURY_10Y'] is not None and data_dict['TREASURY_2Y'] is not None):
            
            treasury_10y = data_dict['TREASURY_10Y']
            treasury_2y = data_dict['TREASURY_2Y']
            
            # Merge on date
            merged = pd.merge(treasury_10y, treasury_2y, on='date', how='inner')
            merged['TERM_SPREAD'] = merged['GS10'] - merged['GS2']
            derived['TERM_SPREAD'] = merged[['date', 'TERM_SPREAD']]
        
        # Calculate investment grade credit spread (BAA - AAA)
        if ('CREDIT_SPREAD_BAA' in data_dict and 'CREDIT_SPREAD_AAA' in data_dict and
            data_dict['CREDIT_SPREAD_BAA'] is not None and data_dict['CREDIT_SPREAD_AAA'] is not None):
            
            baa = data_dict['CREDIT_SPREAD_BAA']
            aaa = data_dict['CREDIT_SPREAD_AAA']
            
            # Merge on date
            merged = pd.merge(aaa, baa, on='date', how='inner')
            merged['CREDIT_SPREAD_IG'] = merged['BAA'] - merged['AAA']
            derived['CREDIT_SPREAD_IG'] = merged[['date', 'CREDIT_SPREAD_IG']]
        
        return derived
    
    def download_all_data(self, start_date='1990-01-01', end_date=None):
        """
        Download all data series and save to CSV files.
        
        Args:
            start_date: Start date for data download
            end_date: End date for data download (default: today)
        """
        print("Starting FRED data download for meta-learning framework...")
        print(f"Data will be saved to: {self.data_folder}")
        
        all_data = {}
        
        # Download each category of data
        for category, series_dict in self.data_series.items():
            print(f"\n--- Downloading {category.upper()} data ---")
            
            category_folder = self.data_folder / category
            category_folder.mkdir(exist_ok=True)
            
            for series_name, series_id in series_dict.items():
                if series_id is None:
                    continue  # Skip derived series for now
                    
                df = self.download_series(series_id, start_date, end_date)
                
                if df is not None:
                    # Save to CSV
                    filename = f"{series_name}.csv"
                    filepath = category_folder / filename
                    df.to_csv(filepath, index=False)
                    print(f"  ✓ Saved {series_name} to {filepath}")
                    
                    # Store for derived calculations
                    all_data[series_name] = df
                else:
                    print(f"  ✗ Failed to download {series_name}")
        
        # Calculate and save derived series
        print(f"\n--- Calculating derived series ---")
        derived_data = self.calculate_derived_series(all_data)
        
        for series_name, df in derived_data.items():
            # Determine which category this belongs to
            if 'RETURNS' in series_name:
                category_folder = self.data_folder / 'market_regime'
            elif 'SPREAD' in series_name:
                category_folder = self.data_folder / 'credit_risk'
            else:
                category_folder = self.data_folder / 'derived'
                category_folder.mkdir(exist_ok=True)
            
            filename = f"{series_name}.csv"
            filepath = category_folder / filename
            df.to_csv(filepath, index=False)
            print(f"  ✓ Calculated and saved {series_name} to {filepath}")
        
        # Create summary file
        self.create_data_summary()
        
        print(f"\n🎉 Data download complete! Check the '{self.data_folder}' folder for all files.")
        print(f"\nNext steps:")
        print(f"1. Verify your CBOE data covers the period {start_date} to {end_date or 'today'}")
        print(f"2. Run data alignment to match CBOE and FRED data frequencies")
        print(f"3. Extract meta-features for market regime detection")
    
    def create_data_summary(self):
        """Create a summary file of all downloaded data."""
        summary = []
        
        for category_folder in self.data_folder.iterdir():
            if category_folder.is_dir():
                for csv_file in category_folder.glob("*.csv"):
                    try:
                        df = pd.read_csv(csv_file)
                        if 'date' in df.columns:
                            start_date = df['date'].min()
                            end_date = df['date'].max()
                            rows = len(df)
                        else:
                            start_date = end_date = "Unknown"
                            rows = len(df)
                        
                        summary.append({
                            'category': category_folder.name,
                            'series': csv_file.stem,
                            'file': str(csv_file.relative_to(self.data_folder)),
                            'rows': rows,
                            'start_date': start_date,
                            'end_date': end_date
                        })
                    except Exception as e:
                        print(f"Error reading {csv_file}: {e}")
        
        summary_df = pd.DataFrame(summary)
        summary_file = self.data_folder / "data_summary.csv"
        summary_df.to_csv(summary_file, index=False)
        print(f"  ✓ Created data summary: {summary_file}")


def main():
    """
    Main function to run the FRED data downloader.
    """
    print("FRED Data Downloader for Meta-Learning Volatility Forecasting")
    print("=" * 60)
    
    # Initialize downloader
    downloader = FREDDataDownloader()
    
    # Check if API key is available
    if not downloader.api_key:
        print("\n❌ FRED API Key Required!")
        print("Please get a free API key from: https://fred.stlouisfed.org/docs/api/api_key.html")
        print("Then set it as an environment variable:")
        print("export FRED_API_KEY='your_api_key_here'")
        print("\nOr provide it directly:")
        api_key = '547d2dc1a99bc6e075afce2394cbb978'
        if api_key:
            downloader.api_key = api_key
        else:
            print("Skipping download. You can run this script again after setting up the API key.")
            return
    
    # Set date range to match your CBOE data
    # Based on your analysis, most CBOE data starts around 2011
    # But VIX goes back to 1990, so we'll get full history
    start_date = '1990-01-01'  # Match VIX history
    end_date = None  # Current date
    
    print(f"\nDownloading data from {start_date} to {end_date or 'today'}")
    print("This will create the following structure:")
    print("data/fred/")
    print("├── market_regime/    (S&P 500, Treasury yields, Fed funds)")
    print("├── credit_risk/      (Credit spreads, TED spread)")
    print("├── currency/         (USD index, major FX rates)")
    print("├── economic/         (Unemployment, inflation, PMI)")
    print("├── market_structure/ (SOFR, margin debt)")
    print("└── data_summary.csv  (Overview of all data)")
    
    # Download all data
    downloader.download_all_data(start_date, end_date)


if __name__ == "__main__":
    main()

FRED Data Downloader for Meta-Learning Volatility Forecasting
Set the API key with: export FRED_API_KEY='your_key_here'

❌ FRED API Key Required!
Please get a free API key from: https://fred.stlouisfed.org/docs/api/api_key.html
Then set it as an environment variable:
export FRED_API_KEY='your_api_key_here'

Or provide it directly:

Downloading data from 1990-01-01 to today
This will create the following structure:
data/fred/
├── market_regime/    (S&P 500, Treasury yields, Fed funds)
├── credit_risk/      (Credit spreads, TED spread)
├── currency/         (USD index, major FX rates)
├── economic/         (Unemployment, inflation, PMI)
├── market_structure/ (SOFR, margin debt)
└── data_summary.csv  (Overview of all data)
Starting FRED data download for meta-learning framework...
Data will be saved to: data/fred

--- Downloading MARKET_REGIME data ---
Downloading SP500...
  ✓ Saved SP500 to data/fred/market_regime/SP500.csv
Downloading VIXCLS...
  ✓ Saved VIX_FRED to data/fred/market_reg