In [ ]:
# Save the cleaned panel dataset
df_export = df[['DATE', 'country', 'yield', 'treat', 'post']].copy()
df_export.to_csv('../data/processed/panel.csv', index=False)

print(f"Panel dataset saved to '../data/processed/panel.csv'")
print(f"Dataset contains {len(df_export)} observations across {df_export['country'].nunique()} countries")
print(f"Date range: {df_export['DATE'].min().strftime('%Y-%m-%d')} to {df_export['DATE'].max().strftime('%Y-%m-%d')}")

# Display final dataset structure
print("\nFinal dataset structure:")
print(df_export.head(10))

## Data Export

In [ ]:
# Create time series plot of bond yields
plt.figure(figsize=(12, 8))

# Plot each country's yield series
countries = ['HU', 'CZ', 'PL']
colors = ['red', 'blue', 'green']
labels = ['Hungary', 'Czech Republic', 'Poland']

for country, color, label in zip(countries, colors, labels):
    country_data = df[df['country'] == country].sort_values('DATE')
    plt.plot(country_data['DATE'], country_data['yield'], 
             color=color, linewidth=2, label=label, marker='o', markersize=3)

# Add vertical line at treatment date (December 22, 2022)
plt.axvline(x=pd.to_datetime('2022-12-22'), color='black', linestyle='--', 
            linewidth=2, alpha=0.7, label='Treatment Date (Dec 22, 2022)')

# Formatting
plt.xlabel('Date', fontsize=12)
plt.ylabel('10-Year Government Bond Yield (%)', fontsize=12)
plt.title('Central European Government Bond Yields (2022-2023)', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()

# Show plot
plt.show()

# Summary statistics by country and period
print("\nSummary Statistics by Country and Period:")
print("=" * 50)
summary_stats = df.groupby(['country', 'post'])['yield'].agg(['mean', 'std', 'count']).round(4)
print(summary_stats)

## Visualization

## Interpretation of Results

The **difference-in-differences coefficient** captures the causal effect of the treatment on Hungarian bond yields relative to the control countries (Czech Republic and Poland).

**Expected Result**: The coefficient should be approximately **+1.3 percentage points**, indicating that the treatment increased Hungarian bond yields by 1.3pp relative to the counterfactual.

**Economic Interpretation**: This suggests that the policy change in Hungary (effective January 1, 2023) led to a significant increase in borrowing costs for the Hungarian government, potentially reflecting increased perceived sovereign risk or reduced investor confidence.

**Statistical Significance**: A p-value < 0.05 would indicate that this effect is statistically significant at the 5% level, providing strong evidence for a causal relationship.

In [ ]:
# Two-way fixed effects difference-in-differences model
df['month'] = df['DATE'].dt.to_period('M')

# Estimate the model with HC1 robust standard errors
formula = 'yield ~ treat*post + C(country) + C(month)'
model = smf.ols(formula, data=df).fit(cov_type='HC1')

# Print the regression results
print("=" * 60)
print("DIFFERENCE-IN-DIFFERENCES ESTIMATION RESULTS")
print("=" * 60)
print(model.summary())

# Extract and highlight the key coefficient
did_coef = model.params['treat:post']
did_se = model.bse['treat:post']
did_pval = model.pvalues['treat:post']

print(f"\n" + "=" * 60)
print("KEY RESULT:")
print(f"**DiD Coefficient (treat:post): {did_coef:.4f}**")
print(f"Standard Error: {did_se:.4f}")
print(f"P-value: {did_pval:.4f}")
print(f"95% CI: [{did_coef - 1.96*did_se:.4f}, {did_coef + 1.96*did_se:.4f}]")
print("=" * 60)

# Difference-in-Differences Analysis of Central European Bond Yields

This notebook implements a difference-in-differences analysis to examine the impact of policy changes on government bond yields in Hungary, Czech Republic, and Poland.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.diagnostic import het_white
import warnings
warnings.filterwarnings('ignore')

## Data Loading and Preprocessing

In [None]:
# Load the three CSV files
hungary = pd.read_csv('../IRLTLT01HUM156N.csv', parse_dates=['DATE'])
hungary['yield'] = hungary.iloc[:, 1]  # Rename value column
hungary['country'] = 'HU'
hungary = hungary[['DATE', 'yield', 'country']]

czechia = pd.read_csv('../IRLTLT01CZM156N.csv', parse_dates=['DATE'])
czechia['yield'] = czechia.iloc[:, 1]  # Rename value column
czechia['country'] = 'CZ'
czechia = czechia[['DATE', 'yield', 'country']]

poland = pd.read_csv('../IRLTLT01PLM156N.csv', parse_dates=['DATE'])
poland['yield'] = poland.iloc[:, 1]  # Rename value column
poland['country'] = 'PL'
poland = poland[['DATE', 'yield', 'country']]

print(f"Hungary: {hungary.shape[0]} observations")
print(f"Czechia: {czechia.shape[0]} observations")
print(f"Poland: {poland.shape[0]} observations")

In [None]:
# Concatenate all dataframes
df = pd.concat([hungary, czechia, poland], ignore_index=True)

# Filter data from 2022 onwards
df = df[df['DATE'] >= '2022-01-01'].copy()

# Create treatment and post-treatment indicators
df['treat'] = (df['country'] == 'HU').astype(int)
df['post'] = (df['DATE'] >= '2023-01-01').astype(int)

print(f"Final dataset: {df.shape[0]} observations")
print(f"Date range: {df['DATE'].min()} to {df['DATE'].max()}")
df.head()

## Difference-in-Differences Estimation

We estimate a two-way fixed effects model to identify the causal effect of the treatment on bond yields:

**Model**: `yield ~ treat*post + C(country) + C(DATE.dt.to_period('M'))`

Where:
- `treat` = 1 if Hungary (HU), 0 otherwise
- `post` = 1 if date >= 2023-01-01, 0 otherwise  
- Country fixed effects control for time-invariant differences
- Monthly time fixed effects control for common time trends