# Post-Quantum RPKI Validation Results

**Scientific Analysis of Post-Quantum Signature Algorithms in RPKI**

This notebook presents comprehensive analysis of post-quantum RPKI measurements, including:
- Repository size overhead analysis
- Validation time comparisons
- Daily bandwidth delta calculations
- Memory profiling results
- Statistical comparisons vs ECDSA baseline

**Author:** Post-Quantum RPKI Research Team  
**Date:** December 2025  
**Dataset:** Real-world RPKI repository measurements


## 1. Setup and Data Loading


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Publication-quality plotting style
try:
    plt.style.use('seaborn-v0_8-paper')
except OSError:
    try:
        plt.style.use('seaborn-paper')
    except OSError:
        plt.style.use('default')

plt.rcParams.update({
    'font.size': 11,
    'font.family': 'serif',
    'axes.labelsize': 12,
    'axes.titlesize': 14,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'figure.titlesize': 16,
    'figure.dpi': 150,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'savefig.pad_inches': 0.1
})

print("✓ Libraries loaded successfully")


In [None]:
# Load data
csv_path = Path("results.csv")
json_path = Path("results.json")

if not csv_path.exists():
    raise FileNotFoundError("results.csv not found. Please run validate.py first.")

# Load CSV data
df = pd.read_csv(csv_path)

# Load JSON metadata
metadata = {}
if json_path.exists():
    with open(json_path, 'r') as f:
        data = json.load(f)
        metadata = data.get('experiment_metadata', {})

print(f"✓ Loaded {len(df)} algorithm results")
print(f"✓ Experiment date: {metadata.get('date', 'Unknown')}")
print(f"✓ Total objects: {metadata.get('total_objects', 'Unknown'):,}")


## 2. Experiment Overview


In [None]:
# Experiment overview
print("=" * 80)
print("  FIRST REAL POST-QUANTUM RPKI MEASUREMENTS (December 2025)")
print("=" * 80)
print(f"\nExperiment Date: {metadata.get('date', 'Unknown')}")
print(f"Total Objects Validated: {metadata.get('total_objects', 'Unknown'):,}")
print(f"Total Algorithms Tested: {len(df)}")
print(f"Successful Validations: {df['validation_success'].sum()}/{len(df)}")
print("=" * 80)


## 3. Summary Table


In [None]:
# Display summary table
summary_cols = ['algorithm', 'algorithm_standardized', 'nist_security_level', 
                'file_count', 'total_size_gb', 'validation_time_min', 'validation_success']
summary_df = df[summary_cols].copy()
summary_df['validation_success'] = summary_df['validation_success'].map({True: 'PASS', False: 'FAIL'})
summary_df.columns = ['Algorithm', 'Standardized Name', 'NIST Level', 
                     'File Count', 'Size (GB)', 'Time (min)', 'Status']
summary_df


## 4. Relative Performance vs ECDSA Baseline


In [None]:
# Calculate relative metrics
baseline = None
if 'ecdsa-baseline' in df['algorithm'].values:
    baseline_df = df[df['algorithm'] == 'ecdsa-baseline']
    if len(baseline_df) > 0:
        baseline = baseline_df.iloc[0]

if baseline is not None:
    df['size_overhead'] = ((df['total_size_gb'] / baseline['total_size_gb'] - 1) * 100).round(2)
    df['time_overhead'] = ((df['validation_time_sec'] / baseline['validation_time_sec'] - 1) * 100).round(2)
    
    comparison_df = df[df['algorithm'] != 'ecdsa-baseline'][['algorithm', 'size_overhead', 'time_overhead']].copy()
    comparison_df.columns = ['Algorithm', 'Size Overhead (%)', 'Time Overhead (%)']
    comparison_df['Size Overhead (%)'] = comparison_df['Size Overhead (%)'].apply(lambda x: f"{x:+.1f}%")
    comparison_df['Time Overhead (%)'] = comparison_df['Time Overhead (%)'].apply(lambda x: f"{x:+.1f}%")
    comparison_df
else:
    print("Baseline data not available for comparison")


## 5. Visualizations

### 5.1 Validation Time Comparison


In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['#2ecc71' if x else '#e74c3c' for x in df['validation_success']]
bars = ax.bar(df['algorithm'], df['validation_time_min'], color=colors, alpha=0.7, edgecolor='black', linewidth=1.2)
ax.set_xlabel('Algorithm', fontweight='bold')
ax.set_ylabel('Validation Time (minutes)', fontweight='bold')
ax.set_title('RPKI Validation Time: Post-Quantum vs Classical', fontweight='bold', pad=20)
ax.grid(axis='y', alpha=0.3, linestyle='--')
ax.set_axisbelow(True)

for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.1f}', ha='center', va='bottom', fontweight='bold')

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


### 5.2 Repository Size Comparison


In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['#3498db' if x else '#e74c3c' for x in df['validation_success']]
bars = ax.bar(df['algorithm'], df['total_size_gb'], color=colors, alpha=0.7, edgecolor='black', linewidth=1.2)
ax.set_xlabel('Algorithm', fontweight='bold')
ax.set_ylabel('Repository Size (GB)', fontweight='bold')
ax.set_title('RPKI Repository Size: Post-Quantum vs Classical', fontweight='bold', pad=20)
ax.grid(axis='y', alpha=0.3, linestyle='--')
ax.set_axisbelow(True)

for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2f}', ha='center', va='bottom', fontweight='bold')

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


### 5.3 Relative Performance vs Baseline


In [None]:
if baseline is not None:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    
    pq_df = df[df['algorithm'] != 'ecdsa-baseline']
    
    # Size overhead
    colors_size = ['#e67e22' if x >= 0 else '#27ae60' for x in pq_df['size_overhead']]
    bars1 = ax1.bar(pq_df['algorithm'], pq_df['size_overhead'], color=colors_size, alpha=0.7, edgecolor='black', linewidth=1.2)
    ax1.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax1.set_xlabel('Algorithm', fontweight='bold')
    ax1.set_ylabel('Size Overhead (%)', fontweight='bold')
    ax1.set_title('Repository Size Overhead vs ECDSA Baseline', fontweight='bold')
    ax1.grid(axis='y', alpha=0.3, linestyle='--')
    ax1.set_axisbelow(True)
    
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height, f'{height:+.1f}%',
                ha='center', va='bottom' if height >= 0 else 'top', fontweight='bold')
    
    # Time overhead
    colors_time = ['#e67e22' if x >= 0 else '#27ae60' for x in pq_df['time_overhead']]
    bars2 = ax2.bar(pq_df['algorithm'], pq_df['time_overhead'], color=colors_time, alpha=0.7, edgecolor='black', linewidth=1.2)
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax2.set_xlabel('Algorithm', fontweight='bold')
    ax2.set_ylabel('Time Overhead (%)', fontweight='bold')
    ax2.set_title('Validation Time Overhead vs ECDSA Baseline', fontweight='bold')
    ax2.grid(axis='y', alpha=0.3, linestyle='--')
    ax2.set_axisbelow(True)
    
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height, f'{height:+.1f}%',
                ha='center', va='bottom' if height >= 0 else 'top', fontweight='bold')
    
    plt.setp([ax1.xaxis.get_majorticklabels(), ax2.xaxis.get_majorticklabels()], rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("Baseline data not available")


## 6. Daily Delta Analysis (Bandwidth Overhead)


In [None]:
# Calculate daily bandwidth overhead (2% daily update rate typical for RPKI)
daily_update_rate = 0.02

if baseline is not None:
    baseline_size = baseline['total_size_bytes']
    baseline_daily_mb = (baseline_size * daily_update_rate) / (1024**2)
    
    print(f"Baseline repository size: {baseline_size / (1024**3):.2f} GB")
    print(f"Assumed daily update rate: {daily_update_rate*100:.1f}%")
    print(f"Baseline daily delta: {baseline_daily_mb:.2f} MB/day\n")
    
    delta_data = []
    for _, row in df.iterrows():
        if row['algorithm'] != 'ecdsa-baseline' and row['total_size_bytes'] > 0:
            size_bytes = row['total_size_bytes']
            overhead_bytes = size_bytes - baseline_size
            overhead_percent = (overhead_bytes / baseline_size * 100) if baseline_size > 0 else 0
            daily_delta_mb = ((size_bytes * daily_update_rate) - (baseline_size * daily_update_rate)) / (1024**2)
            
            delta_data.append({
                'Algorithm': row['algorithm'],
                'Size Overhead (%)': f"{overhead_percent:+.1f}%",
                'Daily Delta (MB/day)': f"{daily_delta_mb:+.2f}"
            })
    
    if delta_data:
        delta_df = pd.DataFrame(delta_data)
        delta_df
else:
    print("Baseline data not available")


## 7. Key Findings


In [None]:
print("=" * 80)
print("KEY FINDINGS")
print("=" * 80)

if baseline is not None:
    for _, row in df.iterrows():
        if row['algorithm'] != 'ecdsa-baseline':
            print(f"\n{row['algorithm'].upper()}:")
            print(f"  • Size overhead: {row['size_overhead']:+.1f}% vs ECDSA")
            print(f"  • Time overhead: {row['time_overhead']:+.1f}% vs ECDSA")
            if row['validation_success']:
                status_msg = row.get('validation_status', 'PASS')
                if 'rpki-client' in str(status_msg).lower():
                    print(f"  • Status: ✓ PASS with real rpki-client validation")
                else:
                    print(f"  • Status: ✓ PASS")

print("\n" + "=" * 80)
print("SCIENTIFIC CONTRIBUTION")
print("=" * 80)
print("\nThis dataset represents the first real-world measurements of")
print("NIST post-quantum signature algorithms (ML-DSA, Falcon) applied")
print("to the global RPKI repository at scale.")
print("=" * 80)


## 8. Detailed Data


In [None]:
# Display full dataset
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df
