In [None]:
%run utilities.ipynb
%run algorithm_1.ipynb
%run algorithm_2.ipynb
%run reduction_subroutine.ipynb


In [None]:
# MAIN EXECUTION

import time
import gc
import json

def batch_process_all_files(inpath,outpath):
    """Process all files in directory with JSONL output format."""
    result_dir = Path(inpath)
    output_dir = Path(outpath)
    output_dir.mkdir(exist_ok=True)
    
    txt_files = list(result_dir.glob("*.txt"))
    print(f"Processing {len(txt_files)} files...")
    
    for input_file in txt_files:
        print("=" * 50)
        
        input_path = Path(input_file)
        output_filename = f"{input_path.stem}-criteria-check.jsonl"
        output_path = output_dir / output_filename
        
        print(f"Processing: {input_path.name}")
        print(f"Output will be saved to: {output_path}")
        
        start_time = time.time()
        record_count = 0
        
        # Clear caches periodically to prevent memory bloat
        if len(_primitive_vector_cache) > 1000:
            _primitive_vector_cache.clear()
        if len(_orbit_cache) > 1000:
            _orbit_cache.clear()
        if len(_stabilizer_cache) > 1000:
            _stabilizer_cache.clear()
        
        # Open output file for writing JSONL
        with open(output_path, 'w', encoding='utf-8') as outfile:
            
            for rec in parse_count_file(input_path):
                label = rec["label"]
                N = Integer(rec["N"])
                m0 = Integer(rec["m0"])
                count = rec.get("count", "N/A")
                adelic_gens = rec["adelic_gens"]
                
                record_count += 1
                record_start = time.time()
                
                def run_algorithms():
                    # Initialize result structure
                    result = {
                        "label": label,
                        "N": int(N),
                        "m0": int(m0),
                        "count": count,
                        "main_status": "FAIL",
                        "fail_stage": "NONE",
                        "ef_status": "SKIPPED_INPUT",
                        "lt_first_fail": None,
                        "lt_components_passed": 1,
                        "lt_data": {},
                        "ef_first_fail_pair": None,
                        "ef_first_fail_side": None,
                        "ef_first_fail_orbit_rep": None,
                        "ef_first_fail_stab_generated_index": None,
                        "ef_first_fail_Gamma_index": None,
                        "ef_first_fail_NA_index": None,
                        "ef_first_fail_NB_index": None,
                        "ef_pairs_checked": 0,
                        "ef_max_Gamma_index": None,
                        "ef_max_stab_generated_index": None
                    }
                    
                    try:
                        # Algorithm 1: Check transitivity for each prime power divisor of m0 using _algorithm1
                        algo1pass = True
                        lt_components_passed = 1
                        
                        # Sort prime powers by size to check smaller ones first
                        prime_powers = [(Integer(l), Integer(k)) for (l, k) in factor(m0)]
                        prime_powers.sort(key=lambda x: x[0]**x[1])
                        
                        for l, k in prime_powers:
                            n = l**k
                            gens_lk = reduction_mod_n(adelic_gens, N, n)
                            
                            # Use _algorithm1 with data collection to check transitivity
                            algo1_data = _algorithm1(gens_lk, l, k, collect_data=True)
                            algo1_result = algo1_data["result"]
                            
                            result["lt_data"][f"[{int(l)},{int(k)}]"] = {
                                "status": algo1_data["status"],
                                "orbit_size": algo1_data["orbit_size"],
                                "target_size": algo1_data["target_size"],
                                "deficit": algo1_data["deficit"]
                            }
                            
                            if not algo1_result:
                                algo1pass = False
                                if result["lt_first_fail"] is None:
                                    result["lt_first_fail"] = [int(l), int(k)]
                                break
                            else:
                                lt_components_passed *= n
                        
                        result["lt_components_passed"] = int(lt_components_passed)
                        
                        if not algo1pass:
                            result["main_status"] = "FAIL"
                            result["fail_stage"] = "LT"
                            result["ef_status"] = "SKIPPED_LT_FAIL"
                            return result
                        
                        # Algorithm 2: Check EF_{m0} condition only if Algorithm 1 passes
                        gens_m0 = reduction_mod_n(adelic_gens, N, m0)
                        algo2pass = _algorithm2(gens_m0, m0)
                        
                        if algo2pass:
                            result["main_status"] = "PASS"
                            result["fail_stage"] = "NONE"
                            result["ef_status"] = "PASS"
                        else:
                            result["main_status"] = "FAIL"
                            result["fail_stage"] = "EF"
                            result["ef_status"] = "FAIL"
                        
                    except Exception as e:
                        result["main_status"] = "FAIL"
                        result["fail_stage"] = "ERROR"
                        result["ef_status"] = "SKIPPED_INPUT"
                    
                    result["record_time"] = float(time.time() - record_start)
                    return result
                
                # Run algorithms without timeout limit
                try:
                    algorithm_result = run_algorithms()
                    exception = None
                except Exception as e:
                    algorithm_result = None
                    exception = e
                
                if exception is not None:
                    # Handle exception
                    result_data = {
                        "label": label, "N": int(N), "m0": int(m0), "count": count,
                        "main_status": "FAIL", "fail_stage": "ERROR", 
                        "ef_status": "SKIPPED_ERROR", "lt_first_fail": None,
                        "lt_components_passed": 1, "lt_data": {},
                        "ef_first_fail_pair": None, "ef_first_fail_side": None,
                        "ef_first_fail_orbit_rep": None,
                        "ef_first_fail_stab_generated_index": None,
                        "ef_first_fail_Gamma_index": None, "ef_first_fail_NA_index": None,
                        "ef_first_fail_NB_index": None, "ef_pairs_checked": 0,
                        "ef_max_Gamma_index": None, "ef_max_stab_generated_index": None,
                        "record_time": float(time.time() - record_start)
                    }
                else:
                    result_data = algorithm_result
                
                # More robust Sage type conversion
                def convert_sage_types(obj):
                    # Handle None
                    if obj is None:
                        return None
                    # Handle dictionaries
                    elif isinstance(obj, dict):
                        return {str(k): convert_sage_types(v) for k, v in obj.items()}
                    # Handle lists and tuples
                    elif isinstance(obj, (list, tuple)):
                        return [convert_sage_types(item) for item in obj]
                    # Handle Sage integers and numbers
                    elif hasattr(obj, 'lift') or hasattr(obj, '_integer_') or str(type(obj)).startswith('sage.'):
                        try:
                            return int(obj)
                        except:
                            try:
                                return float(obj)
                            except:
                                return str(obj)
                    # Handle Sage floats
                    elif hasattr(obj, '_float_'):
                        return float(obj)
                    # Handle other numeric types
                    elif isinstance(obj, (int, float, str, bool)):
                        return obj
                    # Handle anything else by converting to string
                    else:
                        return str(obj)
                
                result_data = convert_sage_types(result_data)
                
                # Write as JSON line (JSONL format)
                outfile.write(json.dumps(result_data) + '\n')
                
                # Print progress (fixed to use Python int)
                if record_count % 10 == 0:
                    print(f"  Processed {record_count} records...")
                
                # Garbage collection periodically
                if record_count % 50 == 0:
                    gc.collect()
        
        total_time = time.time() - start_time
        
        # Write summary to separate file
        summary_path = output_path.with_suffix('.summary.txt')
        with open(summary_path, 'w', encoding='utf-8') as summary_file:
            summary_file.write(f"Processing summary for {input_path.name}\n")
            summary_file.write(f"Total time: {total_time:.2f}s\n")
            summary_file.write(f"Records processed: {record_count}\n")
            summary_file.write(f"Average time per record: {total_time/record_count:.3f}s\n")
            summary_file.write(f"Output saved to: {output_path}\n")
            summary_file.write(f"Note: No timeout limit - entries run as long as needed\n")
        
        print(f"  === SUMMARY ===")
        print(f"  Records processed: {record_count}")
        print(f"  Total time: {total_time:.2f}s")
        print(f"  Average time per record: {total_time/record_count:.3f}s")
        print(f"  Results saved to: {output_path}")
        print(f"  Summary saved to: {summary_path}")
        print()

In [None]:
# Run batch processing
inpath = input("In-path: ") #should be ../result/point_count
outpath = input("Out-path: ") #should be ../result/criteria_check
batch_process_all_files(inpath, outpath)