In [2]:
"""
Script to filter MQTBench circuits to keep only specific ones.
Keeps: ae_indep, dj_indep, ghz_indep, qft_indep, graphstate_indep
With qubits: 2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
Deletes all other .qasm files.
"""

import os
from pathlib import Path
import re


def parse_mqt_filename(filename: str):
    """Parse MQT filename to extract origin and qubit count."""
    # Remove .qasm extension
    name = filename.replace('.qasm', '')
    parts = name.split('_')
    
    # Find the origin and qubit count
    # Format is typically: origin_indep_qiskit_N.qasm or similar
    origin_parts = []
    num_qubits = None
    
    # Look for the number at the end
    for i in range(len(parts) - 1, -1, -1):
        if parts[i].isdigit():
            num_qubits = int(parts[i])
            break
    
    # Extract origin (everything before qiskit or before the number)
    for i, part in enumerate(parts):
        if part == 'qiskit' or part.isdigit():
            origin_parts = parts[:i]
            break
    
    origin = '_'.join(origin_parts) if origin_parts else None
    
    return origin, num_qubits


def filter_mqt_circuits(folder_path: str, dry_run: bool = True):
    """
    Filter MQT circuits to keep only specific ones.
    
    Args:
        folder_path: Path to MQTBench folder
        dry_run: If True, only print what would be deleted (don't actually delete)
    """
    # Define what to keep
    KEEP_ORIGINS = {
        'ae_indep',
        'dj_indep', 
        'ghz_indep',
        'qft_indep',
        'graphstate_indep'
    }
    
    KEEP_QUBITS = {2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}
    
    print("="*70)
    print("MQT CIRCUIT FILTER")
    print("="*70)
    print(f"Folder: {folder_path}")
    print(f"Mode: {'DRY RUN (no files will be deleted)' if dry_run else 'LIVE (files will be deleted)'}")
    print(f"\nKeeping origins: {sorted(KEEP_ORIGINS)}")
    print(f"Keeping qubit counts: {sorted(KEEP_QUBITS)}")
    print("="*70)
    
    if not os.path.exists(folder_path):
        print(f"\nERROR: Folder '{folder_path}' not found!")
        return
    
    # Find all .qasm files
    qasm_files = list(Path(folder_path).rglob('*.qasm'))
    print(f"\nFound {len(qasm_files)} .qasm files total")
    
    # Categorize files
    files_to_keep = []
    files_to_delete = []
    
    for qasm_file in qasm_files:
        filename = qasm_file.name
        origin, num_qubits = parse_mqt_filename(filename)
        
        # Decide if we keep this file
        should_keep = (
            origin in KEEP_ORIGINS and 
            num_qubits in KEEP_QUBITS
        )
        
        if should_keep:
            files_to_keep.append((qasm_file, origin, num_qubits))
        else:
            files_to_delete.append((qasm_file, origin, num_qubits))
    
    # Print summary
    print(f"\n{len(files_to_keep)} files to KEEP:")
    # Group by origin for display
    kept_by_origin = {}
    for file, origin, qubits in files_to_keep:
        if origin not in kept_by_origin:
            kept_by_origin[origin] = []
        kept_by_origin[origin].append((file.name, qubits))
    
    for origin in sorted(kept_by_origin.keys()):
        files = kept_by_origin[origin]
        qubits_list = sorted([q for _, q in files])
        print(f"  {origin}: {len(files)} files (qubits: {qubits_list})")
        for fname, q in sorted(files, key=lambda x: x[1]):
            print(f"    KEEP: {fname}")
    
    print(f"\n{len(files_to_delete)} files to DELETE:")
    if len(files_to_delete) > 0:
        # Show first 20 as examples
        for file, origin, qubits in files_to_delete[:20]:
            print(f"  DELETE: {file.name} (origin={origin}, qubits={qubits})")
        if len(files_to_delete) > 20:
            print(f"  ... and {len(files_to_delete) - 20} more files")
    
    # Delete files if not dry run
    if not dry_run:
        print(f"\n{'='*70}")
        print("DELETING FILES...")
        print(f"{'='*70}")
        
        deleted_count = 0
        for file, origin, qubits in files_to_delete:
            try:
                os.remove(file)
                deleted_count += 1
                if deleted_count <= 10 or deleted_count % 50 == 0:
                    print(f"  Deleted: {file.name}")
            except Exception as e:
                print(f"  ERROR deleting {file.name}: {e}")
        
        print(f"\nSUCCESS: Deleted {deleted_count} files")
        print(f"SUCCESS: Kept {len(files_to_keep)} files")
    else:
        print(f"\n{'='*70}")
        print("DRY RUN COMPLETE - No files were deleted")
        print("To actually delete files, run with dry_run=False")
        print(f"{'='*70}")


if __name__ == "__main__":
    import sys
    
    # Check if folder exists
    folder = 'MQTBench_reduced'
    if not os.path.exists(folder):
        folder = 'MQTBench'
    
    if not os.path.exists(folder):
        print("ERROR: Neither 'MQTBench_reduced' nor 'MQTBench' folder found!")
        print("Please specify the folder path.")
        sys.exit(1)
    
    print(f"Using folder: {folder}\n")
    
    # First run in dry run mode
    print("STEP 1: DRY RUN")
    filter_mqt_circuits(folder, dry_run=True)
    
    # Ask for confirmation
    print("\n" + "="*70)
    response = input("\nDo you want to proceed with deletion? (yes/no): ").strip().lower()
    
    if response == 'yes':
        print("\nSTEP 2: ACTUAL DELETION")
        filter_mqt_circuits(folder, dry_run=False)
    else:
        print("\nDeletion cancelled.")

Using folder: MQTBench_reduced

STEP 1: DRY RUN
MQT CIRCUIT FILTER
Folder: MQTBench_reduced
Mode: DRY RUN (no files will be deleted)

Keeping origins: ['ae_indep', 'dj_indep', 'ghz_indep', 'graphstate_indep', 'qft_indep']
Keeping qubit counts: [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

Found 3877 .qasm files total

54 files to KEEP:
  ae_indep: 11 files (qubits: [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    KEEP: ae_indep_qiskit_2.qasm
    KEEP: ae_indep_qiskit_10.qasm
    KEEP: ae_indep_qiskit_20.qasm
    KEEP: ae_indep_qiskit_30.qasm
    KEEP: ae_indep_qiskit_40.qasm
    KEEP: ae_indep_qiskit_50.qasm
    KEEP: ae_indep_qiskit_60.qasm
    KEEP: ae_indep_qiskit_70.qasm
    KEEP: ae_indep_qiskit_80.qasm
    KEEP: ae_indep_qiskit_90.qasm
    KEEP: ae_indep_qiskit_100.qasm
  dj_indep: 11 files (qubits: [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    KEEP: dj_indep_qiskit_2.qasm
    KEEP: dj_indep_qiskit_10.qasm
    KEEP: dj_indep_qiskit_20.qasm
    KEEP: dj_indep_qiskit_30.qasm
 