In [3]:
import os
import re
import statistics
import math
from collections import Counter

def analyze_keller_stats():
    stats_dir = "statistics"
    if not os.path.exists(stats_dir):
        print("No statistics directory found.")
        return

    all_cliques = []
    runtimes = []
    sizes = []
    file_count = 0
    
    # Target size for success rate (Keller-5 maximum)
    TARGET_SIZE = 28

    # 1. Parse all files in the directory
    for filename in os.listdir(stats_dir):
        if filename.endswith(".txt") and "compact" not in filename:
            file_count += 1
            path = os.path.join(stats_dir, filename)
            with open(path, 'r', encoding='utf-8') as f:
                content = f.read()
                
                # Extract Metadata
                time_match = re.search(r"Time: ([\d.]+)s", content)
                size_match = re.search(r"Size: (\d+)", content)
                vertex_match = re.search(r"Vertices: \[(.*?)\]", content)
                
                if time_match: runtimes.append(float(time_match.group(1)))
                if size_match: sizes.append(int(size_match.group(1)))
                if vertex_match:
                    vertices = [int(v.strip()) for v in vertex_match.group(1).split(',')]
                    all_cliques.append(set(vertices)) # Use set for faster comparison

    if not runtimes:
        print("No valid data found in statistics files.")
        return

    # 2. Basic Statistics
    success_count = sum(1 for s in sizes if s >= TARGET_SIZE)
    success_rate = (success_count / len(sizes)) * 100
    
    mean_time = statistics.mean(runtimes)
    median_time = statistics.median(runtimes)
    min_time = min(runtimes)
    max_time = max(runtimes)
    std_dev = statistics.stdev(runtimes) if len(runtimes) > 1 else 0

    # 3. Diversity Metrics (Jaccard Similarity)
    # Average overlap between all pairs of cliques found
    similarities = []
    if len(all_cliques) > 1:
        # Sample pairs to avoid O(N^2) if you have thousands of files
        sample_size = min(len(all_cliques), 50)
        for i in range(sample_size):
            for j in range(i + 1, sample_size):
                intersection = len(all_cliques[i].intersection(all_cliques[j]))
                union = len(all_cliques[i].union(all_cliques[j]))
                similarities.append(intersection / union)
    
    avg_similarity = statistics.mean(similarities) if similarities else 0
    diversity_index = (1 - avg_similarity) * 100 # Higher means more diverse

    # 4. Frequency Analysis
    flat_list = [v for clique in all_cliques for v in clique]
    vertex_counts = Counter(flat_list)
    total_unique_vertices = len(vertex_counts)

    # OUTPUT RESULTS
    print("="*60)
    print(f"KELLER-5 RESEARCH FINAL ANALYSIS")
    print("="*60)
    print(f"Total Runs Analyzed:   {len(sizes)}")
    print(f"Success Rate (sz 28):  {success_rate:.1f}%")
    print("-" * 60)
    print(f"Mean Runtime:          {mean_time:.2f} s")
    print(f"Median Runtime:        {median_time:.2f} s")
    print(f"Runtime StdDev:        {std_dev:.2f} s")
    print(f"Min / Max Runtime:     {min_time:.2f} s / {max_time:.2f} s")
    print("-" * 60)
    print(f"Unique Vertices Used:  {total_unique_vertices}/1024")
    print(f"Diversity Index:       {diversity_index:.2f}% (100% = unique cliques)")
    print(f"Avg Clique Overlap:    {avg_similarity * 100:.1f}%")
    print("="*60)

    # Persistence Check
    persistent = [v for v, count in vertex_counts.items() if count == len(all_cliques)]
    if persistent:
        print(f"Persistent Backbone:   {len(persistent)} vertices found in EVERY run.")
        print(f"IDs: {persistent}")
    print("="*60)

if __name__ == "__main__":
    analyze_keller_stats()

KELLER-5 RESEARCH FINAL ANALYSIS
Total Runs Analyzed:   94
Success Rate (sz 28):  100.0%
------------------------------------------------------------
Mean Runtime:          90.42 s
Median Runtime:        53.92 s
Runtime StdDev:        84.25 s
Min / Max Runtime:     1.33 s / 333.97 s
------------------------------------------------------------
Unique Vertices Used:  948/1024
Diversity Index:       98.47% (100% = unique cliques)
Avg Clique Overlap:    1.5%
