# OAT - Extract France 10-Year Yield Data

This notebook extracts data from the OAT CSV file, specifically the 10-year yield data (Taux de l'Echéance Constante - 10 ans).

The output CSV will contain the date and value columns, with backward fill (bfill) applied to handle missing values.


In [None]:
import pandas as pd
import os
import warnings
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

warnings.filterwarnings('ignore')


In [None]:
# Change to project root directory
# Find the project root by looking for the 'data' directory
current_dir = os.getcwd()
while not os.path.exists(os.path.join(current_dir, 'data')):
    parent_dir = os.path.dirname(current_dir)
    if parent_dir == current_dir:
        # Reached filesystem root without finding 'data' directory
        raise FileNotFoundError("Could not find project root directory (looking for 'data' folder)")
    current_dir = parent_dir

os.chdir(current_dir)
print(f"Working directory: {os.getcwd()}")

# Define input and output files
input_file = 'data/oat/Webstat_Export_fr_5385693.csv'
output_file = 'data/oat/FRANCE_10_YEARS_DAILY_YIELDS.csv'

# Check that the input file exists
if not os.path.exists(input_file):
    raise FileNotFoundError(f"File not found: {input_file}")

print(f"Input file: {input_file}")
print(f"Output file: {output_file}")


In [None]:
# Read the CSV file
# The file has header in first row, then 5 metadata rows, then data starts
# Format: date;value1;value2;... (semicolon separated)
# The first column is "Titre :" (dates), third column is "Taux de l'Echéance Constante - 10 ans"
print("Reading CSV file...")
df = pd.read_csv(input_file, sep=';', header=0)

print(f"Data shape: {df.shape}")
print(f"\nColumn names:")
print(df.columns.tolist())
print(f"\nFirst few rows:")
print(df.head(10))


In [None]:
# Extract date column (first column "Titre :") and 10-year yield column
date_col_name = df.columns[0]  # "Titre :"
value_col_name = 'Taux de l\'Echéance Constante - 10 ans'

# Check if the column exists
if value_col_name not in df.columns:
    print("Available columns:")
    print(df.columns.tolist())
    raise ValueError(f"Column '{value_col_name}' not found in the CSV file")

df_extracted = df[[date_col_name, value_col_name]].copy()
df_extracted.columns = ['date', 'value']

# Filter out metadata rows (rows where date is not a valid date format)
# Valid dates should be in format YYYY-MM-DD
print(f"\nFiltering out metadata rows...")
print(f"Rows before filtering: {len(df_extracted)}")

print(f"Extracted columns: {df_extracted.columns.tolist()}")
print(f"\nFirst few rows (before filtering):")
print(df_extracted.head(15))


In [None]:
# Convert date column to datetime
# This will automatically filter out metadata rows (they will become NaT)
df_extracted['date'] = pd.to_datetime(df_extracted['date'], format='%Y-%m-%d', errors='coerce')

# Remove rows where date is invalid (metadata rows)
df_extracted = df_extracted[df_extracted['date'].notna()].copy()

print(f"Rows after filtering: {len(df_extracted)}")

# Replace "-" and empty strings with NaN in value column
df_extracted['value'] = df_extracted['value'].replace(['-', ''], pd.NA)

# Convert value column from French format (comma as decimal separator) to numeric
# First replace comma with dot, then convert to numeric
df_extracted['value'] = df_extracted['value'].astype(str).str.replace(',', '.', regex=False)
df_extracted['value'] = pd.to_numeric(df_extracted['value'], errors='coerce')

# Sort by date to ensure proper order
df_extracted = df_extracted.sort_values(by='date').reset_index(drop=True)

print(f"Date range: {df_extracted['date'].min()} to {df_extracted['date'].max()}")
print(f"Total rows: {len(df_extracted)}")
print(f"\nMissing values before bfill:")
print(df_extracted.isnull().sum())
print(f"\nFirst few rows:")
print(df_extracted.head(10))


In [None]:
# Apply backward fill (bfill) to fill missing values
# This fills missing values with the next available value
df_extracted['value'] = df_extracted['value'].bfill()

print(f"Missing values after bfill:")
print(df_extracted.isnull().sum())
print(f"\nFinal data preview:")
print(df_extracted.head(10))
print(f"\n...")
print(df_extracted.tail(10))


In [None]:
# Create output directory if it doesn't exist
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Format date as string (YYYY-MM-DD) for CSV output
df_to_save = df_extracted.copy()
df_to_save['date'] = df_to_save['date'].dt.strftime('%Y-%m-%d')

# Save to CSV (format: date,value)
df_to_save.to_csv(output_file, index=False)
print(f"File saved successfully: {output_file}")
print(f"Dimensions: {len(df_to_save)} rows, {len(df_to_save.columns)} columns")
print(f"\nFinal data summary:")
print(df_extracted.describe())


In [None]:
# Create a plot of the data
plt.figure(figsize=(16, 8))
plt.plot(df_extracted['date'], df_extracted['value'], linewidth=0.8, color='#2E86AB', alpha=0.7)
plt.title('OAT - France 10-Year Yield (Taux de l\'Echéance Constante - 10 ans) - Andamento Storico', 
          fontsize=14, fontweight='bold')
plt.xlabel('Data', fontsize=12)
plt.ylabel('Rendimento (%)', fontsize=12)
plt.grid(True, alpha=0.3, linestyle='--')
plt.tight_layout()

# Format x-axis dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator(2))  # Show every 2 years
plt.xticks(rotation=45)

# Save plot
plot_file = os.path.join(os.path.dirname(output_file), 'FRANCE_10_YEARS_DAILY_YIELDS_plot.png')
plt.savefig(plot_file, dpi=150, bbox_inches='tight')
print(f"Plot saved to: {plot_file}")

plt.show()
