In [None]:
import sys
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path

# Ensure project root is on path
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("."))

from src.utils import load_config

cfg = load_config("../config.yaml")
output_folder = Path(cfg["output_folder"]).resolve()
print("Output folder:", output_folder)

In [None]:
# Load Merton / KMV results computed earlier
results_path = output_folder / "merton_kmv_results.csv"
dd_df = pd.read_csv(results_path)
print("Loaded rows:", len(dd_df))
dd_df.head()

## 1. Definition of Distance to Default (DD)

In the KMV-style implementation, we typically define:

- Short-term debt: $ST$\n
- Long-term debt: $LT$\n
- Default Point: $DP = ST + 0.5 \times LT$\n
- Asset value: $V$ (from solving the Merton system)\n
- Asset volatility: $\sigma_V$ (annualized)\n

A simple one-period Distance to Default is then given by:

$$DD = \frac{V - DP}{V \cdot \sigma_V}$$

This is the number of standard deviations that $V$ is above the default point $DP$ over horizon $T=1$ year.

- If $DD \approx 0$, the firm is right at the edge of default.\n
- If $DD \approx 1$, asset value is one $\sigma_V$ above the default point.\n
- Larger $DD$ values (e.g. 3 or more) indicate very low default risk under the model assumptions.

In [None]:
# Re-compute Default Point (DP) and Distance to Default (DD) from raw columns
# to cross-check the KMV_DD that was already stored.

required_cols = ["V", "Sigma_V", "ST_debt", "LT_debt"]
print("Columns available:", dd_df.columns.tolist())
missing = [c for c in required_cols if c not in dd_df.columns]
if missing:
    raise ValueError(f"Missing columns in merton_kmv_results.csv: {missing}")

dd_df["DP_recalc"] = dd_df["ST_debt"] + 0.5 * dd_df["LT_debt"]
dd_df["DD_recalc"] = (dd_df["V"] - dd_df["DP_recalc"]) / (dd_df["V"] * dd_df["Sigma_V"])

# Compare stored KMV_DP / KMV_DD with recomputed versions
dd_df[["Ticker", "KMV_DP", "KMV_DD", "DP_recalc", "DD_recalc"]]

In [None]:
# Numerical differences between stored and recomputed DD
dd_df["DD_diff"] = dd_df["KMV_DD"] - dd_df["DD_recalc"]
dd_df["DD_diff"].describe()

## 2. Ranking banks by Distance to Default

Here we sort banks from **riskiest** (lowest DD) to **safest** (highest DD).

In [None]:
# Build a compact summary table
summary_cols = ["Ticker", "KMV_DD", "KMV_PD", "Merton_PD"]
summary = dd_df[summary_cols].copy()
summary = summary.sort_values("KMV_DD")
summary.reset_index(drop=True, inplace=True)
summary

In [None]:
# Basic summary statistics for DD and PD
stats = summary[["KMV_DD", "KMV_PD", "Merton_PD"]].describe()
stats

### Interpretation of DD in this sample

- The **minimum DD** corresponds to the riskiest bank according to the KMV framework.\n
- The **median DD** gives a sense of the typical distance to default in the portfolio.\n
- The **maximum DD** corresponds to the safest bank.\n

In practice, model users sometimes map DD buckets to rating-like categories (e.g., DD < 1 as very risky, 1â€“2 as moderate, >3 as very safe), but these thresholds depend on calibration and should be treated as *heuristics* rather than hard rules.

In [None]:
# Bar plot of Distance to Default (KMV_DD) by bank
plt.figure(figsize=(10, 5))
plt.bar(summary["Ticker"], summary["KMV_DD"], color='tab:blue')
plt.axhline(0, color='black', linewidth=0.8)
plt.title("Distance to Default (KMV_DD) by Bank")
plt.ylabel("DD (in asset-volatility units)")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Histogram of DD to see overall distribution
plt.figure(figsize=(8, 4))
plt.hist(dd_df["KMV_DD"], bins=8, color='tab:green', edgecolor='black')
plt.title("Distribution of Distance to Default (KMV_DD)")
plt.xlabel("DD")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

In [None]:
# Relationship between DD and model-implied PDs
plt.figure(figsize=(8, 4))
plt.scatter(dd_df["KMV_DD"], dd_df["KMV_PD"], color='darkorange', label='KMV PD')
plt.scatter(dd_df["KMV_DD"], dd_df["Merton_PD"], color='steelblue', label='Merton PD', marker='x')
plt.gca().invert_xaxis()  # lower DD (riskier) on the right visually
plt.xlabel("Distance to Default (KMV_DD)")
plt.ylabel("Probability of Default")
plt.title("DD vs. Model-Implied PDs")
plt.legend()
plt.tight_layout()
plt.show()

## 3. How DD relates to leverage and volatility

From the formula\n
$$DD = \frac{V - DP}{V \cdot \sigma_V},$$\n
you can see that:

- Higher **leverage** (larger $DP$ relative to $V$) pushes DD down.\n
- Higher **asset volatility** $\sigma_V$ also pushes DD down, because uncertainty makes default more likely.\n
- For a fixed $DP$ and $\sigma_V$, increasing $V$ increases DD.

In [None]:
# Construct simple leverage and volatility measures
dd_df["Leverage"] = dd_df["D_total"] / dd_df["V"]
dd_df["Sigma_V"] = dd_df["Sigma_V"]  # already there, just for clarity
dd_df[["Ticker", "Leverage", "Sigma_V", "KMV_DD"]]

In [None]:
# Scatter: DD vs leverage
plt.figure(figsize=(8, 4))
plt.scatter(dd_df["Leverage"], dd_df["KMV_DD"], color='purple')
for _, row in dd_df.iterrows():
    plt.annotate(row["Ticker"], (row["Leverage"], row["KMV_DD"]), fontsize=8, alpha=0.7)
plt.xlabel("Leverage = D_total / V")
plt.ylabel("Distance to Default (KMV_DD)")
plt.title("DD vs Leverage")
plt.gca().invert_yaxis()  # higher leverage usually means lower DD
plt.tight_layout()
plt.show()

In [None]:
# Scatter: DD vs asset volatility
plt.figure(figsize=(8, 4))
plt.scatter(dd_df["Sigma_V"], dd_df["KMV_DD"], color='teal')
for _, row in dd_df.iterrows():
    plt.annotate(row["Ticker"], (row["Sigma_V"], row["KMV_DD"]), fontsize=8, alpha=0.7)
plt.xlabel("Asset volatility Sigma_V")
plt.ylabel("Distance to Default (KMV_DD)")
plt.title("DD vs Asset Volatility")
plt.gca().invert_yaxis()  # higher volatility usually means lower DD
plt.tight_layout()
plt.show()