# Data Management

This notebook helps manage your comparison data, including listing runs, analyzing disk usage, cleaning up old checkpoints, and generating documentation artifacts.

In [None]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# Add project root to path
project_root = Path('..').resolve()
sys.path.insert(0, str(project_root))

from slavv.dev.management import (
    list_runs, 
    analyze_checkpoints, 
    cleanup_checkpoints, 
    generate_manifest,
    format_size
)

comparisons_dir = project_root / 'comparisons'
if not comparisons_dir.exists():
    print(f"Creating {comparisons_dir}")
    comparisons_dir.mkdir(exist_ok=True)

## 1. List Comparison Runs

View all previous runs and their status.

In [None]:
runs = list_runs(comparisons_dir)

if not runs:
    print("No runs found.")
else:
    # Create a nice DataFrame summary
    df_data = []
    for r in runs:
        content = []
        if r['has_matlab']: content.append('MATLAB')
        if r['has_python']: content.append('Python')
        
        df_data.append({
            'Run Name': r['name'],
            'Size': format_size(r['size']),
            'Content': ', '.join(content),
            'Speedup': f"{r.get('speedup', 0):.1f}x" if r.get('speedup') else '-'
        })
    
    display(pd.DataFrame(df_data))

## 2. Disk Usage Analysis & Cleanup

Identify runs taking up space with intermediate checkpoint files (`.pkl`) and clean them up.

In [None]:
def refresh_analysis():
    clear_output(wait=True)
    print("Analyzing disk usage...\n")
    
    checkpoint_data = analyze_checkpoints(comparisons_dir)
    total_saved = 0
    
    for data in checkpoint_data:
        if data['pkl_count'] > 0:
            print(f"üìÇ {data['name']}")
            print(f"   Total Size: {format_size(data['total_size'])}")
            print(f"   Checkpoints: {data['pkl_count']} files ({format_size(data['pkl_size'])})")
            
            btn = widgets.Button(description=f"Clean {format_size(data['pkl_size'])}", button_style='warning')
            
            def clean_callback(b, d=data):
                freed = cleanup_checkpoints(d)
                b.description = f"Freed {format_size(freed)}"
                b.disabled = True
                b.button_style = 'success'
            
            btn.on_click(clean_callback)
            display(btn)
            print("-" * 40)
            
    if not any(d['pkl_count'] > 0 for d in checkpoint_data):
        print("‚úÖ No checkpoints found. Disk is clean!")

refresh_btn = widgets.Button(description="Refresh Analysis", icon='refresh')
refresh_btn.on_click(lambda b: refresh_analysis())
display(refresh_btn)

refresh_analysis()

## 3. Documentation Generation

Generate `MANIFEST.md` inventory files for your comparison runs.

In [None]:
runs = list_runs(comparisons_dir)
generated_count = 0

print("Generating manifests...\n")
for r in runs:
    try:
        manifest_path = r['path'] / 'MANIFEST.md'
        generate_manifest(r['path'], manifest_path)
        print(f"‚úÖ Generated for {r['name']}")
        generated_count += 1
    except Exception as e:
        print(f"‚ùå Failed for {r['name']}: {e}")

print(f"\nDone! Generated {generated_count} manifests.")