### original plan statistics

In [2]:
import json
import csv
from typing import Dict, List, Any

def analyze_json_file(file_path: str) -> None:
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"File not found {file_path}")
        return
    except json.JSONDecodeError:
        print("ERROR")
        return

    results = []
    
    # all query_id
    query_ids = sorted(data.keys())
    for query_id in query_ids:
        methods_data = data[query_id]
        
        for method in ['cardinality', 'kepler', 'csv']:
            if method not in methods_data:
                continue
                
            method_data = methods_data[method]
            
            # single key
            is_single_key = len(method_data.keys()) == 1
            
            # value >= 200 * 0.95
            has_high_value = any(value >= 200 * 0.95 for value in method_data.values())
            
            # add to result
            if is_single_key or has_high_value:
                note = []
                if is_single_key:
                    note.append("single_key")
                if has_high_value:
                    note.append(">=0.95")
                
                results.append({
                    'query_id': query_id,
                    'method': method,
                    'note': ','.join(note)
                })

    # csv file
    output_file = 'original_plan_analysis.csv'
    with open(output_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['query_id', 'method', 'note'])
        writer.writeheader()
        writer.writerows(results)
    
    print(f"output result {output_file}")
    
    print(f"\nFound {len(results)}")
    single_key_count = sum(1 for r in results if 'single_key' in r['note'])
    high_value_count = sum(1 for r in results if '>=0.95' in r['note'])
    print(f"- single key: {single_key_count}")
    print(f"- value >= 95%: {high_value_count}")

if __name__ == "__main__":
    analyze_json_file('original_plan_statistics.json')

output result original_plan_analysis.csv

Found 59
- single key: 45
- value >= 95%: 59
