In [9]:
import sys
from domino.data_sources import DataSourceClient
from datetime import date
from dateutil.relativedelta import relativedelta
import pandas as pd
from io import StringIO
from functools import lru_cache
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

# ─── Helpers ────────────────────────────────────────────────────────────────

@lru_cache(maxsize=100)
def fetch_treasury_csv(year: int) -> str:
    url = (
        f"https://home.treasury.gov/resource-center/data-chart-center/interest-rates/"
        f"daily-treasury-rates.csv/{year}/all"
        f"?field_tdr_date_value={year}"
        f"&type=daily_treasury_yield_curve&page&_format=csv"
    )
    resp = requests.get(url)
    resp.raise_for_status()
    return resp.text

def parse_tenor(tenor_str: str) -> float:
    num_str, unit = tenor_str.strip().split(maxsplit=1)
    n = float(num_str); u = unit.lower()
    if u.startswith('mo'):   return (n * 30) / 360
    if u.startswith('yr'):   return n
    if u.startswith('day'):  return n / 360
    raise ValueError(f"Unknown tenor unit: '{unit}'")

def prepare_year_rows(year, start_date, end_date):
    """
    Fetch & parse a single year's CSV, return a flat list of rows:
    (curve_type, date, tenor, rate, tenor_num)
    """
    text = fetch_treasury_csv(year)
    df   = pd.read_csv(StringIO(text), parse_dates=['Date'], index_col='Date')
    # filter to window
    df = df[(df.index.date >= start_date) & (df.index.date <= end_date)]
    rows = []
    for ts, row in df.iterrows():
        d = ts.date()
        for tenor, rate in row.items():
            if pd.isna(rate):
                continue
            rows.append((
                'US Treasury Par',
                d.isoformat(),
                tenor,
                float(rate),
                parse_tenor(tenor)
            ))
    return year, rows

# ─── Main loader ────────────────────────────────────────────────────────────

ds = DataSourceClient().get_datasource("market_data")

def populate(
    days: int,
    batch_size: int    = 5000,
    fetch_workers: int = 4,
    write_workers: int = 2
):
    """
    Populate rate_curves for the last `days` days (up to today),
    but not before 2010-03-15.
    """
    # calculate date range
    end_date = date.today()
    start_date = end_date - relativedelta(days=days)
    min_date = date(2010, 3, 15)
    if start_date < min_date:
        start_date = min_date

    years = list(range(start_date.year, end_date.year + 1))

    # 1) parallel fetch + parse per-year
    rows_by_year = {}
    with ThreadPoolExecutor(max_workers=fetch_workers) as fetch_pool:
        futures = {
            fetch_pool.submit(prepare_year_rows, y, start_date, end_date): y
            for y in years
        }
        for fut in as_completed(futures):
            y = futures[fut]
            try:
                year, rows = fut.result()
                print('rows', rows)
                if rows:
                    rows_by_year[year] = rows
                    print(f"{year}: prepared {len(rows)} rows")
                else:
                    print(f"{year}: no data → skipped")
            except Exception as e:
                print(f"{y}: error fetching/parsing → {e}")
    print(rows_by_year)
    # 2) for each year, batch & fire INSERTs in parallel
    def write_batch(batch):
        vals = ", ".join(
            f"('{r[0]}','{r[1]}','{r[2]}',{r[3]},{r[4]})"
            for r in batch
        )
        sql = f"""
            INSERT INTO rate_curves
              (curve_type, curve_date, tenor_str, rate, tenor_num)
            VALUES
              {vals}
            ON CONFLICT (curve_type, curve_date, tenor_str) DO UPDATE
              SET rate      = EXCLUDED.rate,
                  tenor_num = EXCLUDED.tenor_num;
            """
        ds.query(sql)
        print(sql)

    with ThreadPoolExecutor(max_workers=write_workers) as write_pool:
        write_futures = []
        for year, rows in rows_by_year.items():
            for i in range(0, len(rows), batch_size):
                batch = rows[i : i + batch_size]
                write_futures.append(write_pool.submit(write_batch, batch))

        for fut in as_completed(write_futures):
            try:
                fut.result()
            except Exception as e:
                print(f"Write error: {e}")

    print("✅ Done bulk-loading rate_curves "
          f"from {start_date} through {end_date}")

# arg1 is the number of days to backdate.
# 1 => yesterday's curve, 100 => last 100 days.
default_backdated_days = 2

if __name__ == '__main__':
    d = default_backdated_days
else:
    try:
        days_to_backdate = sys.argv[1]
        d = int(days_to_backdate)
    except Exception as e:
        d = default_backdated_days

populate(days=d)    
    

sys argv ['/opt/conda/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/home/ubuntu/.local/share/jupyter/runtime/kernel-814a9dbb-6cbe-4d79-8114-bb11cfc33ed3.json']
thingy __main__
rows []
2025: no data → skipped
{}
✅ Done bulk-loading rate_curves from 2025-05-17 through 2025-05-19
