In [1]:
import json
from pathlib import Path

from phorest_pipeline.shared.config import DATA_DIR, METADATA_FILENAME

In [2]:
print(DATA_DIR)
print(METADATA_FILENAME)

/mnt/storage/Lisa/GMR/Array/Speed_Test/Ethanol_steps/100_uLmin-1/ethanol_step_50_1/Pos0_Flipped
metadata_manifest.json


In [3]:
def filter_manifest_by_skip(input_filepath: Path, output_filepath: Path, skip_step: int):
    """
    Filters a manifest file by setting the 'processing_status' of entries to 'skip'.

    It processes the list of entries and marks every Nth item to remain 'pending',
    while all others are marked as 'skip'.

    Args:
        input_filepath (Path): The path to the source manifest JSON file.
        output_filepath (Path): The path where the filtered JSON file will be saved.
        skip_step (int): The step value. For example, a step of 10 will keep
                         every 10th entry as 'pending'.
    """
    if skip_step <= 0:
        print("Error: 'skip' parameter must be a positive integer.")
        return

    try:
        with input_filepath.open('r') as f:
            data = json.load(f)

        if not isinstance(data, list):
            print("Error: Input JSON is not a list of entries.")
            return

        processed_count = 0
        for index, entry in enumerate(data):
            if isinstance(entry, dict) and "processing_status" in entry:
                if (index + 1) % skip_step != 0:
                    entry["processing_status"] = "skip"
                else:
                    processed_count += 1
        
        with output_filepath.open('w') as f:
            json.dump(data, f, indent=4)

        print(f"Successfully filtered '{input_filepath.name}'.")
        print(f"{processed_count} entries are marked to be processed.")
        print(f"Output saved to '{output_filepath.name}'")

    except FileNotFoundError:
        print(f"Error: The file '{input_filepath}' was not found.")
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from the file '{input_filepath}'.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [4]:
input_filepath = Path(DATA_DIR, METADATA_FILENAME)
output_filepath = Path(DATA_DIR, f"{input_filepath.stem}_FILTERED{input_filepath.suffix}")

skip_every = 15

filter_manifest_by_skip(input_filepath, output_filepath, skip_every)

Successfully filtered 'metadata_manifest.json'.
415 entries are marked to be processed.
Output saved to 'metadata_manifest_FILTERED.json'
