# Motivation

This notebook can be used to find pipeline runs where we have empty evaluation responses despite expecting some. Older versions of Modyn have lest robustness in the evaluation handling.

In [None]:
from pathlib import Path

from tqdm import tqdm

from modyn.supervisor.internal.grpc.enums import PipelineStage
from modyn.supervisor.internal.pipeline_executor.models import MultiEvaluationInfo, PipelineLogs, SingleEvaluationInfo

%load_ext autoreload
%autoreload 2

In [None]:
log_dir = Path("/Users/mboether/phd/dynamic-data/sigmod-data/yearbook/debug/logs")
logfiles = [logfile for logfile in log_dir.glob("**/pipeline.log")]
logfiles

In [None]:
def metrics_valid(logfile: Path):
    logs = PipelineLogs.model_validate_json(logfile.read_text())
    for eval_log in logs.supervisor_logs.stage_runs:
        if eval_log.id == PipelineStage.EVALUATE_MULTI.name:
            multiinfo = eval_log.info
            assert isinstance(multiinfo, MultiEvaluationInfo)

            for info in multiinfo.interval_results:
                assert isinstance(info, SingleEvaluationInfo)
                res = info.results

                if len(res["metrics"]) == 0:
                    if res["dataset_size"] == 0:
                        print(
                            f"Warning: Empty metrics but empty dataset in {logfile}: {info}"
                        )  # Might want to remove this - not sure if needed.
                    else:
                        return False

    return True

In [None]:
invalid_pipelines = []
for logfile in tqdm(logfiles):
    if not metrics_valid(logfile):
        invalid_pipelines.append(logfile)

invalid_pipelines

# Typically, you'd want to delete those directories because they are invalid (see next cell)

In [None]:
# Commented out for safety

"""
import shutil
parent_dirs = {file_path.parent for file_path in invalid_pipelines}

for directory in parent_dirs:
    try:
        shutil.rmtree(directory)
    except Exception as e:
        print(f"Failed to delete {directory}: {e}")
"""