In [2]:
import pandas as pd
from pandas_datareader.data import DataReader
from datetime import datetime
import os

# Fred API
FRED_API_KEY = 'c5bd769e083c29db4135dfff57de42fd'
os.environ['FRED_API_KEY'] = FRED_API_KEY

def debug_fred_data(series_ids, start_date='1980-01-01', end_date='2020-12-31'):
    successful_data = {}
    failed_series = []

    for series_id in series_ids:
        try:
            data = DataReader(series_id, 'fred', start_date, end_date)
            data.rename(columns={series_id: series_id}, inplace=True)
            successful_data[series_id] = data
            print(f"✅ Successfully retrieved: {series_id} with {len(data)} rows.")
        except Exception as e:
            print(f"❌ Failed to retrieve: {series_id} — {e}")
            failed_series.append(series_id)

    return successful_data, failed_series

# Series IDs
# Define an expanded list of Series IDs to introduce more complexity
expanded_series_ids = [
    'CPILFESL',      # Consumer Price Index (CPI) - Monthly
    'UNRATE',        # Unemployment Rate - Monthly
    'GDPC1',         # Real GDP - Quarterly
    'FEDFUNDS',      # Federal Funds Rate - Daily
    'DGS1',          # 1-Year Treasury Yield - Daily
    'DGS10',         # 10-Year Treasury Yield - Daily
    'M2SL',          # M2 Money Stock - Weekly
    'DCOILWTICO',    # Crude Oil Prices - Daily
    'CSUSHPINSA',    # Home Price Index - Monthly
    'RSXFS',         # Retail Sales - Monthly
    'UMCSENT',       # Consumer Sentiment Index - Monthly
    'PAYEMS',        # Total Nonfarm Payrolls - Monthly
    'INDPRO',        # Industrial Production Index - Monthly
    'BUSINV',        # Business Inventories - Monthly
    'EXUSUK',        # USD to GBP Exchange Rate - Daily
    'EXUSEU',        # USD to EUR Exchange Rate - Daily
    'SP500',         # S&P 500 Index - Daily
    'VIXCLS',        # CBOE Volatility Index (VIX) - Daily
    'GS10',          # 10-Year Treasury Constant Maturity Rate - Daily
    'PPIACO',        # Producer Price Index for All Commodities - Monthly
    'HOUST',         # Housing Starts - Monthly
    'CPIAUCSL',      # CPI for All Urban Consumers - Monthly
    'T10YIE',        # 10-Year Breakeven Inflation Rate - Daily
]

# Run
successful_data, failed_series = debug_fred_data(expanded_series_ids)

# success
for series_id, df in successful_data.items():
    print(f"\n📊 {series_id} Sample Data:")
    print(df.head())

# fail
if failed_series:
    print("\n The following series failed to load:")
    for s in failed_series:
        print(f"- {s}")
else:
    print("\n All series retrieved successfully!")


✅ Successfully retrieved: CPILFESL with 492 rows.
✅ Successfully retrieved: UNRATE with 492 rows.
✅ Successfully retrieved: GDPC1 with 164 rows.
✅ Successfully retrieved: FEDFUNDS with 492 rows.
✅ Successfully retrieved: DGS1 with 10698 rows.
✅ Successfully retrieved: DGS10 with 10698 rows.
✅ Successfully retrieved: M2SL with 492 rows.
✅ Successfully retrieved: DCOILWTICO with 9131 rows.
✅ Successfully retrieved: CSUSHPINSA with 408 rows.
✅ Successfully retrieved: RSXFS with 348 rows.
✅ Successfully retrieved: UMCSENT with 492 rows.
✅ Successfully retrieved: PAYEMS with 492 rows.
✅ Successfully retrieved: INDPRO with 492 rows.
✅ Successfully retrieved: BUSINV with 348 rows.
✅ Successfully retrieved: EXUSUK with 492 rows.
✅ Successfully retrieved: EXUSEU with 264 rows.
✅ Successfully retrieved: SP500 with 1529 rows.
✅ Successfully retrieved: VIXCLS with 8088 rows.
❌ Failed to retrieve: IR3TIB10T — Unable to read URL: https://fred.stlouisfed.org/graph/fredgraph.csv?id=IR3TIB10T
Response 

In [3]:
import pandas as pd
from pandas_datareader import data as pdr

# Define an expanded list of Series IDs to introduce more complexity
expanded_series_ids = [
    'CPILFESL',      # Consumer Price Index (CPI) - Monthly
    'UNRATE',        # Unemployment Rate - Monthly
    'GDPC1',         # Real GDP - Quarterly
    'FEDFUNDS',      # Federal Funds Rate - Daily
    'DGS1',          # 1-Year Treasury Yield - Daily
    'DGS10',         # 10-Year Treasury Yield - Daily
    'M2SL',          # M2 Money Stock - Weekly
    'DCOILWTICO',    # Crude Oil Prices - Daily
    'CSUSHPINSA',    # Home Price Index - Monthly
    'RSXFS',         # Retail Sales - Monthly
    'UMCSENT',       # Consumer Sentiment Index - Monthly
    'PAYEMS',        # Total Nonfarm Payrolls - Monthly
    'INDPRO',        # Industrial Production Index - Monthly
    'BUSINV',        # Business Inventories - Monthly
    'EXUSUK',        # USD to GBP Exchange Rate - Daily
    'EXUSEU',        # USD to EUR Exchange Rate - Daily
    'SP500',         # S&P 500 Index - Daily
    'VIXCLS',        # CBOE Volatility Index (VIX) - Daily
    'GS10',          # 10-Year Treasury Constant Maturity Rate - Daily
    'PPIACO',        # Producer Price Index for All Commodities - Monthly
    'HOUST',         # Housing Starts - Monthly
    'CPIAUCSL',      # CPI for All Urban Consumers - Monthly
    'T10YIE',        # 10-Year Breakeven Inflation Rate - Daily
]

# Function to retrieve and merge FRED data
def retrieve_and_merge_fred_data(series_ids, start_date='1980-01-01', end_date='2024-12-31'):
    merged_data = pd.DataFrame()

    for series_id in series_ids:
        try:
            data = pdr.DataReader(series_id, 'fred', start_date, end_date)
            data.rename(columns={series_id: series_id}, inplace=True)

            # Special handling for GDP (Quarterly Data)
            if series_id == 'GDPC1':
                data = data.resample('ME').ffill()  # Forward-fill quarterly data to monthly
            else:
                data = data.resample('ME').mean()  # Aggregate higher frequency data to monthly

            # Merge the data
            if merged_data.empty:
                merged_data = data
            else:
                merged_data = pd.merge(merged_data, data, left_index=True, right_index=True, how='outer')

        except Exception as e:
            print(f"❌ Failed to retrieve {series_id}: {e}")

    # Calculate Inflation Rate if CPI exists
    if 'CPILFESL' in merged_data.columns:
        merged_data['Inflation_Rate'] = merged_data['CPILFESL'].pct_change() * 100

    # Drop rows with NaN values caused by missing data
    merged_data.dropna(inplace=True)

    return merged_data

# Retrieve the expanded dataset
expanded_data = retrieve_and_merge_fred_data(expanded_series_ids)

# Split data into training and testing sets based on time
split_date = "2020-01-01"
df_train_expanded = expanded_data.loc[:split_date]
df_test_expanded = expanded_data.loc[split_date:]

# Save the datasets
df_train_expanded.to_csv("train_expanded.csv")
df_test_expanded.to_csv("test_expanded.csv")

print("✅ Data retrieval complete. Saved as train_expanded.csv and test_expanded.csv.")


✅ Data retrieval complete. Saved as train_expanded.csv and test_expanded.csv.
