# Data Getter Notebook

This notebook handles all bulk data fetching for the project. It includes smart caching that checks date ranges and avoids re-fetching data unnecessarily.

**Important:** This is the ONLY place where bulk data fetching should happen. Other notebooks and code should only READ already-fetched data.


In [1]:
import sys
from pathlib import Path
import pandas as pd
import os

# Add src to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

from src.data.fred_loader import (
    download_series, 
    check_data_coverage, 
    get_series_date_range,
    load_all_fred_data
)
from src.data.polymarket_loader import (
    fetch_market_history,
    check_market_exists
)
from src.data.fedwatch_loader import check_fedwatch_files_exist
from src.config import FRED_SERIES
from src.utils.logging_utils import setup_logging
from dotenv import load_dotenv

# Set up logging
setup_logging()

# Load environment variables
load_dotenv()

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)


## Configuration

Set these variables at the top to control data fetching behavior.


In [2]:
# ============================================================================
# CONFIGURATION - Modify these settings as needed
# ============================================================================

# Force reload: If True, re-downloads all data regardless of what exists
FORCE_RELOAD = False

# FRED Data Configuration
FRED_START_DATE = None  # None = all available data from FRED
FRED_END_DATE = None    # None = today (checks if data is recent within threshold)
FRED_RECENT_THRESHOLD_DAYS = 7  # If end_date is None, check if data is within this many days of today

# Polymarket Configuration
# List of market IDs to fetch. Add market IDs here as needed.
POLYMARKET_MARKETS = [
    # Example: "market-id-here",
    # Add your market IDs here
]

# FedWatch Configuration
# FedWatch files are manually downloaded Excel files.
# List expected filenames here (optional, for validation)
EXPECTED_FEDWATCH_FILES = None  # None = just check if any files exist
# Example: ["fedwatch_meeting_20240320.xlsx", "fedwatch_meeting_20240612.xlsx"]

# ============================================================================


## FRED Data

Download all required FRED series with smart date range checking.


In [3]:
# Get FRED API key
api_key = os.getenv("FRED_API_KEY")
if not api_key:
    print("WARNING: FRED_API_KEY not found in environment.")
    print("Please set it in a .env file or as an environment variable.")
    print("Skipping FRED data download.")
    fred_downloaded = []
    fred_skipped = []
    fred_errors = []
else:
    print(f"Downloading/checking {len(FRED_SERIES)} FRED series...")
    print(f"Force reload: {FORCE_RELOAD}")
    if FRED_START_DATE:
        print(f"Start date: {FRED_START_DATE}")
    if FRED_END_DATE:
        print(f"End date: {FRED_END_DATE}")
    else:
        print(f"End date: today (checking if data is within {FRED_RECENT_THRESHOLD_DAYS} days)")
    print()
    
    # Convert date strings to Timestamps if provided
    start_ts = pd.to_datetime(FRED_START_DATE) if FRED_START_DATE else None
    end_ts = pd.to_datetime(FRED_END_DATE) if FRED_END_DATE else None
    
    fred_downloaded = []
    fred_skipped = []
    fred_errors = []
    
    for series_id in FRED_SERIES:
        try:
            if FORCE_RELOAD:
                # Force download
                print(f"Force reloading {series_id}...")
                download_series(series_id, api_key, start_date=start_ts, end_date=end_ts)
                fred_downloaded.append(series_id)
            else:
                # Check if data exists and covers required range
                is_covered, reason = check_data_coverage(
                    series_id,
                    start_date=start_ts,
                    end_date=end_ts,
                    recent_threshold_days=FRED_RECENT_THRESHOLD_DAYS
                )
                
                if is_covered:
                    date_range = get_series_date_range(series_id)
                    print(f"✓ {series_id}: {reason}")
                    fred_skipped.append(series_id)
                else:
                    print(f"↓ {series_id}: {reason} - Downloading...")
                    download_series(series_id, api_key, start_date=start_ts, end_date=end_ts)
                    fred_downloaded.append(series_id)
        except Exception as e:
            print(f"✗ {series_id}: Error - {e}")
            fred_errors.append((series_id, str(e)))
    
    print()
    print(f"FRED Summary:")
    print(f"  Downloaded: {len(fred_downloaded)}")
    print(f"  Skipped: {len(fred_skipped)}")
    print(f"  Errors: {len(fred_errors)}")
    if fred_errors:
        for series_id, error in fred_errors:
            print(f"    - {series_id}: {error}")


Downloading/checking 6 FRED series...
Force reload: False
End date: today (checking if data is within 7 days)

✓ DGS2: Data covers range: 1976-06-01 to 2025-11-14
✓ DGS10: Data covers range: 1962-01-02 to 2025-11-14
↓ UNRATE: Data is 109 days old (max date: 2025-08-01), threshold: 7 days - Downloading...
2025-11-18 14:02:28 - src.data.fred_loader - INFO - Downloading FRED series: UNRATE
2025-11-18 14:02:30 - src.data.fred_loader - INFO - Saved UNRATE to /home/bitzaven/CodingProjects/ExamplesFixedIncomeModelling/data/raw/fred/UNRATE.csv
↓ CPIAUCSL: Data is 78 days old (max date: 2025-09-01), threshold: 7 days - Downloading...
2025-11-18 14:02:30 - src.data.fred_loader - INFO - Downloading FRED series: CPIAUCSL
2025-11-18 14:02:31 - src.data.fred_loader - INFO - Saved CPIAUCSL to /home/bitzaven/CodingProjects/ExamplesFixedIncomeModelling/data/raw/fred/CPIAUCSL.csv
↓ FEDFUNDS: Data is 48 days old (max date: 2025-10-01), threshold: 7 days - Downloading...
2025-11-18 14:02:31 - src.data.fre

## FedWatch Data

FedWatch data must be manually downloaded from the CME FedWatch tool website and placed in `data/raw/fedwatch/`.

This section checks if the expected files exist.


In [4]:
all_exist, existing_files, missing_files = check_fedwatch_files_exist(EXPECTED_FEDWATCH_FILES)

print("FedWatch File Check:")
if EXPECTED_FEDWATCH_FILES is None:
    if all_exist:
        print(f"✓ Found {len(existing_files)} FedWatch file(s)")
        for f in existing_files:
            print(f"  - {f}")
    else:
        print("⚠ No FedWatch files found")
        print("  Please manually download FedWatch Excel files and place them in data/raw/fedwatch/")
        print("  Expected naming: fedwatch_meeting_YYYYMMDD.xlsx")
else:
    if all_exist:
        print(f"✓ All {len(existing_files)} expected FedWatch files found:")
        for f in existing_files:
            print(f"  - {f}")
    else:
        print(f"⚠ {len(missing_files)} FedWatch file(s) missing:")
        for f in missing_files:
            print(f"  - {f}")
        if existing_files:
            print(f"\nFound {len(existing_files)} file(s):")
            for f in existing_files:
                print(f"  - {f}")


FedWatch File Check:
⚠ No FedWatch files found
  Please manually download FedWatch Excel files and place them in data/raw/fedwatch/
  Expected naming: fedwatch_meeting_YYYYMMDD.xlsx


## Polymarket Data

Fetch historical data for Polymarket markets. Markets are specified in the configuration section above.


In [5]:
if not POLYMARKET_MARKETS:
    print("No Polymarket markets configured. Add market IDs to POLYMARKET_MARKETS in the configuration section.")
    polymarket_downloaded = []
    polymarket_skipped = []
    polymarket_errors = []
else:
    print(f"Fetching/checking {len(POLYMARKET_MARKETS)} Polymarket market(s)...")
    print(f"Force reload: {FORCE_RELOAD}")
    print()
    
    polymarket_downloaded = []
    polymarket_skipped = []
    polymarket_errors = []
    
    for market_id in POLYMARKET_MARKETS:
        try:
            if FORCE_RELOAD:
                print(f"Force reloading {market_id}...")
                fetch_market_history(market_id, force_reload=True)
                polymarket_downloaded.append(market_id)
            else:
                if check_market_exists(market_id):
                    print(f"✓ {market_id}: Already exists, skipping")
                    polymarket_skipped.append(market_id)
                else:
                    print(f"↓ {market_id}: Not found, fetching...")
                    fetch_market_history(market_id, force_reload=False)
                    polymarket_downloaded.append(market_id)
        except Exception as e:
            print(f"✗ {market_id}: Error - {e}")
            polymarket_errors.append((market_id, str(e)))
    
    print()
    print(f"Polymarket Summary:")
    print(f"  Downloaded: {len(polymarket_downloaded)}")
    print(f"  Skipped: {len(polymarket_skipped)}")
    print(f"  Errors: {len(polymarket_errors)}")
    if polymarket_errors:
        for market_id, error in polymarket_errors:
            print(f"    - {market_id}: {error}")


No Polymarket markets configured. Add market IDs to POLYMARKET_MARKETS in the configuration section.


## Summary

Overall summary of data fetching operations.


In [6]:
print("=" * 60)
print("DATA FETCHING SUMMARY")
print("=" * 60)
print()

# FRED Summary
if api_key:
    print("FRED Data:")
    print(f"  Series downloaded: {len(fred_downloaded)}")
    if fred_downloaded:
        print(f"    {', '.join(fred_downloaded)}")
    print(f"  Series skipped: {len(fred_skipped)}")
    if fred_skipped:
        print(f"    {', '.join(fred_skipped)}")
    print(f"  Errors: {len(fred_errors)}")
    print()
else:
    print("FRED Data: Skipped (no API key)")
    print()

# FedWatch Summary
print("FedWatch Data:")
if all_exist:
    print(f"  Files found: {len(existing_files)}")
else:
    print(f"  Files found: {len(existing_files)}")
    print(f"  Files missing: {len(missing_files)}")
print()

# Polymarket Summary
if POLYMARKET_MARKETS:
    print("Polymarket Data:")
    print(f"  Markets downloaded: {len(polymarket_downloaded)}")
    if polymarket_downloaded:
        print(f"    {', '.join(polymarket_downloaded)}")
    print(f"  Markets skipped: {len(polymarket_skipped)}")
    if polymarket_skipped:
        print(f"    {', '.join(polymarket_skipped)}")
    print(f"  Errors: {len(polymarket_errors)}")
    print()
else:
    print("Polymarket Data: No markets configured")
    print()

print("=" * 60)
print("Data fetching complete!")
print("=" * 60)
print()
print("Note: Other notebooks should only READ this data, not fetch it.")
print("Use this notebook (01_datagetter.ipynb) for all bulk data fetching.")


DATA FETCHING SUMMARY

FRED Data:
  Series downloaded: 4
    UNRATE, CPIAUCSL, FEDFUNDS, GDPC1
  Series skipped: 2
    DGS2, DGS10
  Errors: 0

FedWatch Data:
  Files found: 0
  Files missing: 0

Polymarket Data: No markets configured

Data fetching complete!

Note: Other notebooks should only READ this data, not fetch it.
Use this notebook (01_datagetter.ipynb) for all bulk data fetching.
