In [1]:
fld = "/data_1/ATM/data_1/sfm/projects/src_test2/stats/"
txt_file = "2024_03_04_00_16_21_Tapas_raw.txt"

In [2]:
f = open(fld + txt_file, 'r')
raw_output = f.read()

In [29]:
import re

def extract_stats(raw_output):
    stats = {
        "general_info": {"total_images": 0, "matches": 0},
        "calibration": {},
        "epochs": [],
        "warnings": {"total_warnings": 0, "types": []}
    }

    lines = raw_output.split("\n")

    current_epoch = None
    for line in lines:

        # count number of images
        if '"OIS.*tif":' in line:
            stats["general_info"]["total_images"] = int(re.search(r'(\d+) matches', line).group(1))

        # get calibration info
        if "MdPppppF=" in line:
            stats["calibration"] = {
                "MdPppppF": float(re.search(r'MdPppppF= (\d+\.\d+)', line).group(1)),
                "SFE": float(re.search(r'SFE=(-?\d+(?:\.\d+)?)', line).group(1)),
                "FocMm": float(re.search(r'FocMm(\d+\.\d+)', line).group(1)),
                "XSZ": [int(x) for x in re.findall(r'XSZ=\[(\d+),(\d+)\]', line)[0]],
            }

        # get summary of an iteration
        if "--- End Iter" in line:
            match = re.search(r'--- End Iter (\d+) STEP (\d+)', line)
            if match:
                current_epoch = {"epoch": f"{match.group(1)}-{match.group(2)}", "image_processing": [], "statistical_summary": {}, "epoch_stats": {}}
                stats["epochs"].append(current_epoch)

        # get properties of images
        if "RES:" in line and current_epoch is not None:
            image_stats, image_name = parse_image_stats(line)
            if image_name:  # Ensure an image name was actually found
                current_epoch["image_processing"].append({"image": image_name, **image_stats})

        if "Stat on type of point" in line and current_epoch:
            current_epoch["statistical_summary"] = {}

        if "Perc=" in line and current_epoch:
            match = re.search(r'Perc=(\d+\.\d+)% ;  Nb=(\d+) for (\w+)', line)
            if match:
                current_epoch["statistical_summary"][match.group(3)] = {
                    "percentage": float(match.group(1)),
                    "number": int(match.group(2)),
                }
        if "| |" in line and current_epoch:
            key, value = line.split(";;")[0].strip("| | ").split(" = ")
            current_epoch["epoch_stats"][key] = float(value)
    print(stats)

    return stats


def parse_image_stats(line):
    # Split the line by spaces, but first remove the image name part to avoid confusion
    image_name_match = re.search(r'RES:\[(.+?)\]\[C\]', line)
    if not image_name_match:
        return {}, ""
    image_name = image_name_match.group(1)

    # Remove the part of the line before the stats start
    stats_part = line.split(" ", 2)[-1]  # Gets the part after the image name and [C]

    # Now, split the stats part into individual stats assuming key-value pairs
    parts = stats_part.split()
    stats_dict = {}
    i = 0
    while i < len(parts) - 1:  # Ensure there's always a pair to process
        key = parts[i].rstrip(':')  # Clean up the key
        try:
            # Attempt to convert the next part to a float
            value = float(parts[i + 1])
            stats_dict[key] = value
            i += 2  # Move past this key-value pair
        except ValueError:
            # If conversion fails, it's likely not a numeric value; skip this pair
            i += 1  # Just move to the next part, treating it as a potential key

    return stats_dict, image_name


In [30]:
import json
jsons_stats = extract_stats(raw_output)
print(json.dumps(jsons_stats, indent=4))

{
    "general_info": {
        "total_images": 9,
        "matches": 0
    },
    "calibration": {
        "MdPppppF": 1.81514,
        "SFE": -1.0,
        "FocMm": 152.274,
        "XSZ": [
            8800,
            8800
        ]
    },
    "epochs": [
        {
            "epoch": "0-0",
            "image_processing": [
                {
                    "image": "OIS-Reech_CA214732V0027.tif",
                    "Nn": 0.367647,
                    "Of": 544.0,
                    "Mul": 208.0,
                    "Mul-NN": 0.0,
                    "Time": 0.00979686
                },
                {
                    "image": "OIS-Reech_CA214732V0028.tif",
                    "Nn": 1.08696,
                    "Of": 828.0,
                    "Mul": 414.0,
                    "Mul-NN": 0.0,
                    "Time": 0.01389
                },
                {
                    "image": "OIS-Reech_CA214732V0029.tif",
                    "Nn": 0.938967,
         