# Yield Curve Fitting with Nelson-Siegel and Svensson Models

This notebook demonstrates fitting parametric yield curve models to bond data.

**Author:** Brett Cooper  
**Date:** 2025-01-15  
**Purpose:** Fit yield curves for multiple countries and create weighted composite curves

## Models Used
- **Nelson-Siegel:** 4-parameter model for smooth yield curves
- **Svensson:** 6-parameter extension for more complex shapes

In [None]:
# Import required libraries
import sys
sys.path.insert(0, '../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Import our custom modules
from yield_curves.yield_curve_fitting import (
    fit_nelson_siegel,
    fit_svensson,
    create_weighted_curve
)

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

print("Libraries loaded successfully!")

## 1. Load Bond Data

Load the raw bond yield data for multiple countries and maturities.

In [None]:
# Define data paths
DATA_DIR = Path('../data/raw')

# Load bond data
bond_data = pd.read_csv(DATA_DIR / 'bond_dat.csv', header=None)

print(f"Bond data shape: {bond_data.shape}")
print("\nFirst few rows:")
bond_data.head(10)

## 2. Parse and Structure Bond Data

Extract metadata (countries, maturities) and organize the yield data.

In [None]:
# Extract metadata from specific rows (matching R code structure)
# Row 1: Maturities
# Row 3: Countries
# Row 8+: Dates and yields

maturities = pd.to_numeric(bond_data.iloc[0, 1:], errors='coerce').values
countries = bond_data.iloc[2, 1:].values
dates = pd.to_datetime(bond_data.iloc[7:, 0])

# Extract yield matrix (rows 8+, columns 2+)
yields_matrix = bond_data.iloc[7:, 1:].apply(pd.to_numeric, errors='coerce').values

print(f"Number of dates: {len(dates)}")
print(f"Number of series: {len(countries)}")
print(f"Unique countries: {sorted(set(countries))}")
print(f"Maturities range: {np.nanmin(maturities):.1f} to {np.nanmax(maturities):.1f} years")

## 3. Fit Yield Curves for a Single Country

Demonstrate fitting process for one country on a specific date.

In [None]:
# Select a country and date for demonstration
demo_country = 'US'
demo_date_idx = -1  # Most recent date

# Filter data for this country
country_mask = countries == demo_country
demo_maturities = maturities[country_mask]
demo_yields = yields_matrix[demo_date_idx, country_mask] / 100  # Convert to decimal

# Remove any NaN values
valid_idx = ~np.isnan(demo_yields)
demo_maturities = demo_maturities[valid_idx]
demo_yields = demo_yields[valid_idx]

print(f"Country: {demo_country}")
print(f"Date: {dates.iloc[demo_date_idx]}")
print(f"Number of points: {len(demo_yields)}")
print(f"\nYields by maturity:")
for mat, yld in zip(demo_maturities, demo_yields):
    print(f"  {mat:5.1f}Y: {yld*100:6.3f}%")

In [None]:
# Fit both models
ns_curve, ns_fitted = fit_nelson_siegel(demo_yields, demo_maturities)
sv_curve, sv_fitted = fit_svensson(demo_yields, demo_maturities)

# Generate smooth curve for plotting
smooth_maturities = np.linspace(demo_maturities.min(), demo_maturities.max(), 100)
ns_smooth = np.array([ns_curve(t) for t in smooth_maturities])
sv_smooth = np.array([sv_curve(t) for t in smooth_maturities])

# Plot results
fig, ax = plt.subplots(figsize=(14, 8))

ax.scatter(demo_maturities, demo_yields * 100, s=100, color='black', 
           label='Observed', zorder=5)
ax.plot(smooth_maturities, ns_smooth * 100, linewidth=2, 
        label='Nelson-Siegel', linestyle='--')
ax.plot(smooth_maturities, sv_smooth * 100, linewidth=2, 
        label='Svensson', linestyle='-')

ax.set_xlabel('Maturity (Years)', fontsize=12)
ax.set_ylabel('Yield (%)', fontsize=12)
ax.set_title(f'{demo_country} Yield Curve Fitting - {dates.iloc[demo_date_idx].strftime("%Y-%m-%d")}',
             fontsize=14, fontweight='bold')
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nNelson-Siegel Parameters:")
print(f"  β₀ (level): {ns_curve.beta0:.4f}")
print(f"  β₁ (slope): {ns_curve.beta1:.4f}")
print(f"  β₂ (curvature): {ns_curve.beta2:.4f}")
print(f"  τ (decay): {ns_curve.tau:.4f}")

## 4. Fit Curves for All Countries Over Time

Process all countries and dates to build a complete yield curve database.

In [None]:
# Define fitting period (from 2020 onwards as in R code)
START_YEAR = 2020
fitting_dates = dates[dates >= f'{START_YEAR}-01-01']

# Get unique maturities for output
unique_maturities = sorted(set(maturities[~np.isnan(maturities)]))
unique_countries = sorted(set(countries))

print(f"Fitting period: {fitting_dates.min()} to {fitting_dates.max()}")
print(f"Number of dates: {len(fitting_dates)}")
print(f"Countries: {unique_countries}")
print(f"Output maturities: {unique_maturities}")
print(f"\nThis will create {len(unique_countries)} x {len(fitting_dates)} x {len(unique_maturities)} fitted yields")

In [None]:
# Initialize storage for fitted yields
# Structure: [country, date, maturity]
ns_fitted_yields = np.zeros((len(unique_countries), len(fitting_dates), len(unique_maturities)))
sv_fitted_yields = np.zeros((len(unique_countries), len(fitting_dates), len(unique_maturities)))

# Fit curves (this may take a minute)
print("Fitting yield curves...")
errors = []

for date_idx, date in enumerate(fitting_dates):
    # Get the row index in original data
    row_idx = dates[dates == date].index[0] - dates.index[0]
    
    for country_idx, country in enumerate(unique_countries):
        try:
            # Get data for this country and date
            country_mask = countries == country
            obs_maturities = maturities[country_mask]
            obs_yields = yields_matrix[row_idx, country_mask] / 100
            
            # Remove NaN values
            valid = ~np.isnan(obs_yields)
            if valid.sum() < 3:  # Need at least 3 points
                continue
                
            clean_mats = obs_maturities[valid]
            clean_yields = obs_yields[valid]
            
            # Fit curves
            ns_curve, _ = fit_nelson_siegel(clean_yields, clean_mats)
            sv_curve, _ = fit_svensson(clean_yields, clean_mats)
            
            # Get fitted values at standard maturities
            for mat_idx, maturity in enumerate(unique_maturities):
                ns_fitted_yields[country_idx, date_idx, mat_idx] = ns_curve(maturity) * 100
                sv_fitted_yields[country_idx, date_idx, mat_idx] = sv_curve(maturity) * 100
                
        except Exception as e:
            errors.append((date, country, str(e)))
    
    if (date_idx + 1) % 50 == 0:
        print(f"  Processed {date_idx + 1}/{len(fitting_dates)} dates")

print(f"\nFitting complete!")
print(f"Errors encountered: {len(errors)}")

## 5. Create Weighted Composite Curve (WPU)

Combine individual country curves using economic weights.

In [None]:
# Load WPU weights
wpu_weights = pd.read_excel(DATA_DIR / 'wpu_weights.xlsx')

print("WPU Weights structure:")
print(wpu_weights.head())
print(f"\nShape: {wpu_weights.shape}")
print(f"Columns: {wpu_weights.columns.tolist()}")

## 6. Save Fitted Yields

Export fitted yield curves for use in other analyses.

In [None]:
# Create output directory
PROCESSED_DIR = Path('../data/processed')
PROCESSED_DIR.mkdir(exist_ok=True, parents=True)

# Save as NumPy arrays (Python equivalent of .Rdata)
np.savez(
    PROCESSED_DIR / 'fitted_yield_curves.npz',
    ns_yields=ns_fitted_yields,
    sv_yields=sv_fitted_yields,
    countries=unique_countries,
    dates=fitting_dates.values,
    maturities=unique_maturities
)

print(f"Fitted yields saved to {PROCESSED_DIR / 'fitted_yield_curves.npz'}")
print("\nUse these in the bond swap analysis notebook!")

## Summary

This notebook has:
1. Loaded and structured multi-country bond yield data
2. Demonstrated Nelson-Siegel and Svensson curve fitting
3. Fitted curves for all countries over time
4. Saved processed yields for downstream analysis

## Next Steps

- Return to `01_bond_swap_analysis.ipynb` to use these fitted curves
- Explore yield curve evolution over time
- Analyze interest rate differentials across countries