## Integration in den Prüfprozess



In [1]:
#to aquire full access to the data, connect the google drive folder to this Notebook
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install patchify

Collecting patchify
  Downloading patchify-0.2.3-py3-none-any.whl.metadata (3.0 kB)
Collecting numpy<2,>=1 (from patchify)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading patchify-0.2.3-py3-none-any.whl (6.6 kB)
Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m105.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, patchify
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.


In [1]:
import os
import glob
import json
import time
import tifffile
import patchify
import cv2
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict, Optional
from PIL import Image
import torch
from transformers import SamModel, SamConfig, SamProcessor

best_model_save_path = "/content/drive/MyDrive/Colab Notebooks/Bachelor/Plots/Plots_Von_07_09/5e-5/best_model_boxpromt_jitter_20ep_5e5.pt"

image_folder     = "/content/drive/MyDrive/Colab Notebooks/Bachelor/validation_data/images"     # .tif
tps_mask_folder  = "/content/drive/MyDrive/Colab Notebooks/Bachelor/validation_data/tps_layer"  # .png (ROI/TPS)
gt_folder        = "/content/drive/MyDrive/Colab Notebooks/Bachelor/validation_data/gt_masks"   # .png (GT für HA)
out_folder       = "/content/drive/MyDrive/Colab Notebooks/Bachelor/Plots/Plots_Von_07_09/5e5/inference"
os.makedirs(out_folder, exist_ok=True)

# Masken und Geometrie
def invert_mask(path: str) -> Tuple[np.ndarray, List[np.ndarray]]:
    mask_orig = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if mask_orig is None:
        raise FileNotFoundError(f"Maske nicht gefunden: {path}")
    contour_TPS, _ = cv2.findContours(mask_orig, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    mask = np.ones_like(mask_orig, dtype=np.uint8) * 255
    cv2.drawContours(mask, contour_TPS, -1, 0, thickness=cv2.FILLED)
    return mask, contour_TPS

def apply_mask(img: np.ndarray, mask: np.ndarray, mode: str = "and") -> np.ndarray:
    if mode == "and": return cv2.bitwise_and(img, img, mask=mask)
    if mode == "or":  return cv2.bitwise_or(img, mask)
    raise ValueError("apply_mask: mode muss 'and' oder 'or' sein")

def shift_mask_binary(m: np.ndarray, dx: int, dy: int) -> np.ndarray:
    h, w = m.shape
    out = np.zeros_like(m)
    # Zielbereich im Ausgabebild
    y0 = max(0,  dy);  y1 = min(h, h + dy)
    x0 = max(0,  dx);  x1 = min(w, w + dx)
    # Quellbereich im Eingabebild
    yy0 = max(0, -dy); yy1 = yy0 + (y1 - y0)
    xx0 = max(0, -dx); xx1 = xx0 + (x1 - x0)
    if y1 > y0 and x1 > x0:
        out[y0:y1, x0:x1] = m[yy0:yy1, xx0:xx1]
    return out

def pad_to_multiple(a: np.ndarray, multiple: int) -> np.ndarray:
    H, W = a.shape[:2]
    padH = (multiple - (H % multiple)) % multiple
    padW = (multiple - (W % multiple)) % multiple
    if padH or padW:
        pads = ((0, padH), (0, padW)) + ((0, 0),) * (a.ndim - 2)
        a = np.pad(a, pads, mode="constant", constant_values=0)
    return a

def ceil_to_multiple(n: int, m: int) -> int:
    return ((n + m - 1) // m) * m

def mask_to_box(mask: np.ndarray, margin: int = 4, min_pixels: int = 1):
    assert mask.ndim == 2
    H, W = mask.shape
    ys, xs = np.where(mask > 0)
    if ys.size < min_pixels:
        return None
    y0, y1 = int(ys.min()), int(ys.max())
    x0, x1 = int(xs.min()), int(xs.max())
    if margin > 0:
        y0 = max(0, y0 - margin)
        x0 = max(0, x0 - margin)
        y1 = min(H - 1, y1 + margin)
        x1 = min(W - 1, x1 + margin)
    return float(x0), float(y0), float(x1), float(y1)


def make_cosine_window(p: int, eps: float = 1e-3) -> np.ndarray:

    w1 = np.hanning(p).astype(np.float32)
    w2 = np.outer(w1, w1).astype(np.float32)
    return np.maximum(w2, eps)

def reconstruct_overlapping(patches: np.ndarray,
                            coords: np.ndarray,
                            H_pad: int,
                            W_pad: int,
                            patch_size: int,
                            window: np.ndarray) -> np.ndarray:

    acc  = np.zeros((H_pad, W_pad), dtype=np.float32)
    wsum = np.zeros((H_pad, W_pad), dtype=np.float32)
    for patch, (y0, x0) in zip(patches, coords):
        y1 = y0 + patch_size
        x1 = x0 + patch_size
        acc[y0:y1, x0:x1]  += patch.astype(np.float32) * window
        wsum[y0:y1, x0:x1] += window
    return acc / np.maximum(wsum, 1e-6)

def reconstruct_overlapping_binary(patches: np.ndarray,
                                   coords: np.ndarray,
                                   H_pad: int,
                                   W_pad: int,
                                   patch_size: int) -> np.ndarray:
    acc = np.zeros((H_pad, W_pad), dtype=np.uint8)
    for patch, (y0, x0) in zip(patches, coords):
        y1 = y0 + patch_size
        x1 = x0 + patch_size
        cur = acc[y0:y1, x0:x1]
        np.maximum(cur, patch, out=cur)
        acc[y0:y1, x0:x1] = cur
    return acc

def thickness_per_column(binary_mask_0_255: np.ndarray) -> np.ndarray:
    return np.sum(binary_mask_0_255 == 255, axis=0).astype(np.int32)

def dice_iou(pred_0_255: np.ndarray, gt_0_255: np.ndarray) -> Tuple[float, float]:
    p = (pred_0_255 > 0).astype(np.uint8)
    g = (gt_0_255   > 0).astype(np.uint8)
    inter = np.sum((p == 1) & (g == 1))
    p_sum = np.sum(p)
    g_sum = np.sum(g)
    union = p_sum + g_sum - inter
    dice = (2.0 * inter) / (p_sum + g_sum) if (p_sum + g_sum) > 0 else 1.0
    iou  = (inter / union) if union > 0 else 1.0
    return float(dice), float(iou)

def coverage_recall(pred_0_255: np.ndarray, gt_0_255: np.ndarray) -> float:
    p = (pred_0_255 > 0)
    g = (gt_0_255   > 0)
    gt_pos = np.sum(g)
    if gt_pos == 0:
        return 1.0 if np.sum(p) == 0 else 0.0
    return float(np.sum(p & g) / gt_pos)

def basename_noext(p: str) -> str:
    return os.path.splitext(os.path.basename(p))[0]

In [4]:
### SAM TRAINIERT ###

model_config = SamConfig.from_pretrained("facebook/sam-vit-base")
processor    = SamProcessor.from_pretrained("facebook/sam-vit-base")
my_HA_model  = SamModel(config=model_config)
my_HA_model.load_state_dict(torch.load(best_model_save_path))

device = "cuda" if torch.cuda.is_available() else "cpu"
my_HA_model.to(device)
my_HA_model.eval()

'''
### SAM UNTRAINIERT ###
model_name = "facebook/sam-vit-base"
# Processor und Standard SAM laden
processor = SamProcessor.from_pretrained(model_name)
sam_model = SamModel.from_pretrained(model_name)
# Gerät wählen und Modell in den Auswertungsmodus setzen
device = "cuda" if torch.cuda.is_available() else "cpu"
sam_model.to(device)
sam_model.eval()
'''

# PARAMETER

patch_size = 256
step       = 128
final_thr  = 0.80
save_viz   = True
shift_px   = 110          # Verschiebung in Pixel nach oben für HA-Streifen
margin    = 4            # Box-Rand in Pixel (Puffer rund um die eigentliche Box)
min_px    = 1            # Minimale Pixelanzahl für eine Box



# PAIRING
# Bild <-> TPS-Schicht <-> Ground Truth

imgs  = sorted(glob.glob(os.path.join(image_folder, "*.tif")))
tps   = sorted(glob.glob(os.path.join(tps_mask_folder, "*.png")))
gts   = sorted(glob.glob(os.path.join(gt_folder, "*.png")))

img_map = {basename_noext(p): p for p in imgs}
tps_map = {basename_noext(p): p for p in tps}
gt_map  = {basename_noext(p): p for p in gts}

common = sorted(set(img_map) & set(tps_map) & set(gt_map))
pairs = [(img_map[k], tps_map[k], gt_map[k]) for k in common]
print(f"Gefundene Tripel: {len(pairs)}")

# PROCESSING

win = make_cosine_window(patch_size)

for idx, (img_path, tps_path, gt_path) in enumerate(pairs, 1):
    stem = basename_noext(img_path)
    print(f"[{idx}/{len(pairs)}] {stem}")
    t0 = time.perf_counter()
    try:
        # Laden
        large_img = tifffile.imread(img_path)
        if large_img is None or large_img.ndim != 2:
            raise ValueError("Erwarte zweidimensionales Graubild")

        tps_inv, _ = invert_mask(tps_path)            # 255 außerhalb TPS
        img_masked = apply_mask(large_img, tps_inv)   # Bild auf TPS begrenzen

        gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
        if gt is None:
            raise FileNotFoundError(f"GT nicht gefunden: {gt_path}")
        gt_bin = (gt > 0).astype(np.uint8) * 255
        gt_masked = cv2.bitwise_and(gt_bin, tps_inv)  # GT auf TPS begrenzen

        # HA Streifen aus verschobener Nullregion erzeugen
        zero_mask = (img_masked == 0).astype(np.uint8) * 255
        kernel = np.ones((3, 3), np.uint8)
        zero_mask = cv2.morphologyEx(zero_mask, cv2.MORPH_OPEN, kernel)
        m_orig  = (zero_mask > 0).astype(np.uint8)
        m_shift = shift_mask_binary(m_orig, dx=0, dy=-shift_px)
        ha_mask = ((m_shift == 1) & (m_orig == 0)).astype(np.uint8) * 255

        # Patch Bildung
        assert img_masked.shape == gt_masked.shape == ha_mask.shape
        H_orig, W_orig = img_masked.shape
        img_pad = pad_to_multiple(img_masked, patch_size)
        gt_pad  = pad_to_multiple(gt_masked,  patch_size)
        ha_pad  = pad_to_multiple(ha_mask,    patch_size)

        img_p = patchify.patchify(img_pad, (patch_size, patch_size), step=step)
        gt_p  = patchify.patchify(gt_pad,  (patch_size, patch_size), step=step)
        ha_p  = patchify.patchify(ha_pad,  (patch_size, patch_size), step=step)

        img_patches, gt_patches, ha_patches, coords = [], [], [], []
        for i in range(img_p.shape[0]):
            for j in range(img_p.shape[1]):
                y0 = i * step
                x0 = j * step
                img_patches.append(img_p[i, j])
                gt_patches.append(gt_p[i, j].astype(np.uint8))
                ha_patches.append(ha_p[i, j].astype(np.uint8))
                coords.append((y0, x0))
        img_patches = np.array(img_patches)
        gt_patches  = np.array(gt_patches)
        ha_patches  = np.array(ha_patches)
        coords      = np.array(coords, dtype=np.int32)

        # Inferenz mit SAM Standard und HA Box
        N, H, W = img_patches.shape
        probs = np.zeros((N, H, W), dtype=np.float32)

        prompt_log = {
            "promptart": "box",
            "quelle": "ha",
            "parameter": {"margin": margin, "min_pixels": min_px,
                          "patch_size": patch_size, "step": step,
                          "shift_px": shift_px},
            "prompts": []
        }

        with torch.no_grad():
            for k in range(N):
                img_patch = img_patches[k]
                ha_patch  = ha_patches[k]

                box = mask_to_box(ha_patch, margin=margin, min_pixels=min_px)

                # Fallback Box wenn keine Region gefunden wurde
                if box is None:
                    h, w = img_patch.shape
                    cy = max(0, min(h - 1, h // 2))
                    band = 2
                    y0b = max(0, cy - band)
                    y1b = min(h - 1, cy + band)
                    box = [0, y0b, w - 1, y1b]

                x0, y0, x1, y1 = box
                pil_img = Image.fromarray(img_patch).convert("RGB")

                inputs = processor(
                    images=pil_img,
                    input_boxes=[[[x0, y0, x1, y1]]],
                    return_tensors="pt"
                )
                inputs = {kk: vv.to(device) for kk, vv in inputs.items()}

                #out = sam_model(**inputs, multimask_output=False) # Für untrainierten SAM
                out = my_HA_model(**inputs, multimask_output=False) # Für trainierten SAM
                p_map = torch.sigmoid(out.pred_masks[0, 0, 0])  # Form H mal W
                probs[k] = p_map.cpu().numpy()

                gy, gx = int(coords[k][0]), int(coords[k][1])
                box_global = [float(x0 + gx), float(y0 + gy), float(x1 + gx), float(y1 + gy)]
                prompt_log["prompts"].append({
                    "patch_index": int(k),
                    "patch_top_left": [gy, gx],
                    "box_global_xyxy": box_global
                })

        # Rekonstruktion
        H_pad = ceil_to_multiple(H_orig, patch_size)
        W_pad = ceil_to_multiple(W_orig, patch_size)

        full_prob = reconstruct_overlapping(probs, coords, H_pad, W_pad, patch_size, win)
        full_pred = (full_prob > final_thr).astype(np.uint8) * 255
        full_img  = reconstruct_overlapping(img_patches.astype(np.float32), coords, H_pad, W_pad, patch_size, win)
        full_gt   = reconstruct_overlapping_binary(gt_patches, coords, H_pad, W_pad, patch_size)

        # Zuschnitt auf Originalmaß
        full_img_c  = full_img[:H_orig, :W_orig]
        full_prob_c = full_prob[:H_orig, :W_orig]
        full_pred_c = full_pred[:H_orig, :W_orig]
        full_gt_c   = full_gt[:H_orig, :W_orig]

        # Artefakte speichern
        prob_u8 = np.rint(np.clip(full_prob_c * 255.0, 0, 255)).astype(np.uint8)
        prob_dir = os.path.join(out_folder, "prob_maps")
        os.makedirs(prob_dir, exist_ok=True)
        out_prob = os.path.join(prob_dir, f"{stem}_prob.png")

        pred_dir = os.path.join(out_folder, "pred_masks")
        os.makedirs(pred_dir, exist_ok=True)
        out_pred = os.path.join(pred_dir, f"{stem}_pred.png")

        gt_dir = os.path.join(out_folder, "gt_mask")
        os.makedirs(gt_dir, exist_ok=True)
        out_gt = os.path.join(gt_dir, f"{stem}_gt.png")

        cv2.imwrite(out_prob, prob_u8)
        cv2.imwrite(out_pred, full_pred_c)
        cv2.imwrite(out_gt,   full_gt_c)

        # Dickenprofile
        th_pred = thickness_per_column(full_pred_c)
        th_gt   = thickness_per_column(full_gt_c)

        thickness_pred_dir = os.path.join(out_folder, "thickness_pred")
        os.makedirs(thickness_pred_dir, exist_ok=True)
        out_th_pred = os.path.join(thickness_pred_dir, f"{stem}_thickness_pred.csv")

        thickness_gt_dir = os.path.join(out_folder, "thickness_gt")
        os.makedirs(thickness_gt_dir, exist_ok=True)
        out_th_gt   = os.path.join(thickness_gt_dir, f"{stem}_thickness_gt.csv")

        idxs = np.arange(W_orig, dtype=np.int32)
        np.savetxt(out_th_pred, np.c_[idxs, th_pred], delimiter=",",
                   header="spaltenindex,dicke_pixel", fmt="%d", comments="")
        np.savetxt(out_th_gt,   np.c_[idxs, th_gt],   delimiter=",",
                   header="spaltenindex,dicke_pixel", fmt="%d", comments="")

        # Bildmetriken
        dice, iou = dice_iou(full_pred_c, full_gt_c)
        diff = th_pred.astype(np.int32) - th_gt.astype(np.int32)
        mae  = float(np.mean(np.abs(diff))) if diff.size else 0.0
        bias = float(np.mean(diff)) if diff.size else 0.0
        mabs = int(np.max(np.abs(diff))) if diff.size else 0
        cover = coverage_recall(full_pred_c, full_gt_c)
        t_ms = (time.perf_counter() - t0) * 1000.0

        metrics_dir = os.path.join(out_folder, "metrics")
        os.makedirs(metrics_dir, exist_ok=True)

        metrics = {
            "dice": dice,
            "iou": iou,
            "mae_dicke": mae,
            "bias_dicke": bias,
            "max_abs_err": mabs,
            "abdeckung": cover,
            "laufzeit_ms": float(round(t_ms, 1))
        }
        with open(os.path.join(metrics_dir, f"{stem}_metrics.json"), "w") as f:
            json.dump(metrics, f, indent=2)

        # Prompt Protokoll
        prompts_dir = os.path.join(out_folder, "prompts")
        os.makedirs(prompts_dir, exist_ok=True)
        with open(os.path.join(prompts_dir, f"{stem}_prompt_protokoll.json"), "w") as f:
            json.dump(prompt_log, f, indent=2)

        # Bildmetadaten
        metadata_dir = os.path.join(out_folder, "metadata")
        os.makedirs(metadata_dir, exist_ok=True)
        metadata = {
            "bildkennung": stem,
            "patchgroesse": int(patch_size),
            "schrittweite": int(step),
            "ueberlappung": float(step / patch_size),
            "schwellenwert": float(final_thr),
            "verwendete_nachbearbeitung": "keine",
            "prompt_quelle": "ha",
            "ha_shift_px": int(shift_px),
            "blending": "cosine_window_hanning_eps1e-3",
            "prob_map_skalierung": "round(prob*255)"
        }
        with open(os.path.join(metadata_dir, f"{stem}_bildmetadaten.json"), "w") as f:
            json.dump(metadata, f, indent=2)

        # Visualisierung
        if save_viz:
            viz_dir = os.path.join(out_folder, "viz")
            os.makedirs(viz_dir, exist_ok=True)
            fig, axes = plt.subplots(1, 4, figsize=(16, 4))
            axes[0].imshow(full_img_c, cmap="gray"); axes[0].set_title("Bild"); axes[0].axis("off")
            axes[1].imshow(full_gt_c, cmap="gray");  axes[1].set_title("GT"); axes[1].axis("off")
            axes[2].imshow(prob_u8, vmin=0, vmax=255); axes[2].set_title("Wahrscheinlichkeit"); axes[2].axis("off")
            axes[3].imshow(full_pred_c, cmap="gray"); axes[3].set_title("Vorhersage"); axes[3].axis("off")
            plt.tight_layout()
            plt.savefig(os.path.join(viz_dir, f"{stem}_viz.png"), dpi=150)
            plt.close(fig)

        print(f"{stem}: prob, pred, profile und JSON gespeichert")

        if device == "cuda":
            torch.cuda.empty_cache()

    except Exception as e:
        print(f"Fehler bei {stem}: {e}")

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Gefundene Tripel: 21
[1/21] 2181_1_3 (5)
2181_1_3 (5): prob, pred, profile und JSON gespeichert
[2/21] 2181_2_1 (4)
2181_2_1 (4): prob, pred, profile und JSON gespeichert
[3/21] 2181_3_1 (3)
2181_3_1 (3): prob, pred, profile und JSON gespeichert
[4/21] 2181_3_2 (2)
2181_3_2 (2): prob, pred, profile und JSON gespeichert
[5/21] 2181_3_2 (4)
2181_3_2 (4): prob, pred, profile und JSON gespeichert
[6/21] 2181_3_3 (1)
2181_3_3 (1): prob, pred, profile und JSON gespeichert
[7/21] 2303_01 (4)
2303_01 (4): prob, pred, profile und JSON gespeichert
[8/21] 2303_02 (8)
2303_02 (8): prob, pred, profile und JSON gespeichert
[9/21] 2303_04 (3)
2303_04 (3): prob, pred, profile und JSON gespeichert
[10/21] 2303_05 (4)
2303_05 (4): prob, pred, profile und JSON gespeichert
[11/21] 2303_06 (2)
2303_06 (2): prob, pred, profile und JSON gespeichert
[12/21] 2303_06 (4)
2303_06 (4): prob, pred, profile und JSON gespeichert
[13/21] 2303_08 (2)
2303_08 (2): prob, pred, profile und JSON gespeichert
[14/21] 2303_0

# Berechnung des Vorhersagefehlers

In [5]:
from pathlib import Path
import re
import math
import numpy as np
import pandas as pd
from scipy import stats

# Pfade
base_dir = Path("/content/drive/MyDrive/Colab Notebooks/Bachelor/Plots/Plots_Von_07_09/5e-5/inference")
pred_dir = base_dir / "thickness_pred"
gt_dir   = base_dir / "thickness_gt"

# ---------- Helfer ----------

def has_header(csv_path):
    """Erkennt simpel, ob die erste Zeile eher Header ist."""
    with open(csv_path, 'r', encoding='utf-8', errors='ignore') as f:
        first = f.readline()
    # Wenn Buchstaben in der ersten Zeile stecken, werten wir das als Header
    return bool(re.search(r'[A-Za-zÄÖÜäöü]', first))

def read_thickness_csv(csv_path):
    """Liest eine 2-Spalten-CSV: [spaltenindex, dicke_pixel]. Robust gegen Header und Dezimalkomma."""
    if has_header(csv_path):
        df = pd.read_csv(csv_path)
        # versuche die wahrscheinlichsten Spaltennamen
        cols = list(df.columns)
        if len(cols) < 2:
            # Fallback: ohne Header lesen
            df = pd.read_csv(csv_path, header=None, names=["spaltenindex", "dicke_pixel"])
        else:
            # mappe möglichst auf Standardnamen
            mapping = {}
            # erstbeste zwei Spalten nehmen, aber sinnvoll benennen
            mapping[cols[0]] = "spaltenindex"
            mapping[cols[1]] = "dicke_pixel"
            df = df.rename(columns=mapping)[["spaltenindex", "dicke_pixel"]]
    else:
        df = pd.read_csv(csv_path, header=None, names=["spaltenindex", "dicke_pixel"])

    # Dezimalkomma abfangen und auf float casten
    for c in ["spaltenindex", "dicke_pixel"]:
        ser = df[c].astype(str).str.replace(',', '.', regex=False)
        df[c] = pd.to_numeric(ser, errors='coerce')

    # sortieren, Duplikate im Index vermeiden
    df = df.drop_duplicates(subset=["spaltenindex"]).sort_values("spaltenindex").reset_index(drop=True)
    return df

def clean_stem(p: Path):
    """Entfernt trailing _pred oder _gt aus dem Dateinamenstamm."""
    stem = p.stem  # ohne .csv
    stem = re.sub(r'_(pred|gt)$', '', stem, flags=re.IGNORECASE)
    return stem

def compute_metrics(errors: np.ndarray, total_cols: int):
    """
    Gibt Kennwerte als Dict zurück.
    errors: 1D-Array der validen Fehler e(j) = d_pred - d_ref
    total_cols: Gesamtzahl der Spalten (vor Filter), für Abdeckung
    """
    W = int(np.sum(np.isfinite(errors)))
    if W == 0:
        return dict(W=0, total_columns=int(total_cols), coverage=np.nan,
                    MAE=np.nan, Bias=np.nan, Std=np.nan,
                    CI95_lower=np.nan, CI95_upper=np.nan,
                    MaxAbsError=np.nan)

    e = errors[np.isfinite(errors)]
    mae = float(np.mean(np.abs(e)))
    bias = float(np.mean(e))
    std = float(np.std(e, ddof=1)) if W > 1 else np.nan

    if W > 1 and np.isfinite(std):
        tcrit = stats.t.ppf(0.975, df=W-1)
        half_width = tcrit * std / math.sqrt(W)
        ci_lo = bias - half_width
        ci_hi = bias + half_width
    else:
        ci_lo = np.nan
        ci_hi = np.nan

    max_abs = float(np.max(np.abs(e)))
    coverage = float(W / total_cols) if total_cols > 0 else np.nan

    return dict(W=W, total_columns=int(total_cols), coverage=coverage,
                MAE=mae, Bias=bias, Std=std,
                CI95_lower=ci_lo, CI95_upper=ci_hi,
                MaxAbsError=max_abs)

# ---------- Dateien paaren ----------

pred_files = list(pred_dir.glob("*.csv"))
gt_files   = list(gt_dir.glob("*.csv"))

pred_map = {clean_stem(p): p for p in pred_files}
gt_map   = {clean_stem(p): p for p in gt_files}

common_stems = sorted(set(pred_map.keys()) & set(gt_map.keys()))
missing_pred = sorted(set(gt_map.keys()) - set(pred_map.keys()))
missing_gt   = sorted(set(pred_map.keys()) - set(gt_map.keys()))

if missing_pred:
    print("Warnung: Es fehlen _pred Dateien für:", missing_pred)
if missing_gt:
    print("Warnung: Es fehlen _gt Dateien für:", missing_gt)

# ---------- Metriken je Paar und gesamt ----------

rows = []
all_errors = []

for stem in common_stems:
    df_pred = read_thickness_csv(pred_map[stem]).rename(columns={"dicke_pixel": "d_pred"})
    df_gt   = read_thickness_csv(gt_map[stem]).rename(columns={"dicke_pixel": "d_ref"})

    # Outer-Join auf Spaltenindex, um Gesamtzahl der Spalten zu bestimmen
    merged = pd.merge(df_pred[["spaltenindex", "d_pred"]],
                      df_gt[["spaltenindex", "d_ref"]],
                      on="spaltenindex", how="outer", sort=True)

    # Gesamtzahl vor dem Filter
    total_cols = merged["spaltenindex"].nunique()

    # valide Paare: beide Werte endlich
    valid = merged[["d_pred", "d_ref"]].apply(np.isfinite).all(axis=1)
    merged_valid = merged.loc[valid].copy()

    # Fehler
    merged_valid["e"] = merged_valid["d_pred"] - merged_valid["d_ref"]

    # Kennwerte
    metrics = compute_metrics(merged_valid["e"].to_numpy(), total_cols)
    metrics["datei_stamm"] = stem
    rows.append(metrics)

    # für Gesamt
    all_errors.append(merged_valid["e"].to_numpy())

# Gesamt über alle Paare
if all_errors:
    all_errors_vec = np.concatenate(all_errors)
    total_cols_sum = sum(r["total_columns"] for r in rows)  # Summe der Spalten über alle Dateien
    overall = compute_metrics(all_errors_vec, total_cols_sum)
    overall["datei_stamm"] = "ALLE_DATEIEN"
    rows.append(overall)

# ---------- Ergebnis als Tabelle ----------

cols_order = ["datei_stamm", "W", "total_columns", "coverage",
              "MAE", "Bias", "Std", "CI95_lower", "CI95_upper", "MaxAbsError"]

result_df = pd.DataFrame(rows)[cols_order]

# etwas aufräumen und runden
num_cols = ["coverage", "MAE", "Bias", "Std", "CI95_lower", "CI95_upper", "MaxAbsError"]
result_df[num_cols] = result_df[num_cols].astype(float).round(6)

# speichern
out_path = base_dir / "thickness_metrics_summary_2.csv"
result_df.to_csv(out_path, index=False)
print(f"Fertig. Zusammenfassung gespeichert unter:\n{out_path}")

# erste Zeilen zeigen
result_df.head(10)

Fertig. Zusammenfassung gespeichert unter:
/content/drive/MyDrive/Colab Notebooks/Bachelor/Plots/Plots_Von_07_09/5e-5/inference/thickness_metrics_summary_2.csv


Unnamed: 0,datei_stamm,W,total_columns,coverage,MAE,Bias,Std,CI95_lower,CI95_upper,MaxAbsError
0,2181_1_3 (5)_thickness,2560,2560,1.0,7.125781,-4.936719,9.963312,-5.322852,-4.550585,52.0
1,2181_2_1 (4)_thickness,2560,2560,1.0,27.264062,-20.889062,29.889274,-22.047437,-19.730688,94.0
2,2181_3_1 (3)_thickness,2560,2560,1.0,16.079297,-13.417578,22.657211,-14.29567,-12.539486,99.0
3,2181_3_2 (2)_thickness,2560,2560,1.0,11.932422,-11.648047,13.640701,-12.176699,-11.119394,76.0
4,2181_3_2 (4)_thickness,2560,2560,1.0,35.31875,-35.150781,32.17255,-36.397645,-33.903917,134.0
5,2181_3_3 (1)_thickness,2560,2560,1.0,19.166016,-16.945703,23.57315,-17.859293,-16.032113,102.0
6,2303_01 (4)_thickness,2560,2560,1.0,5.215625,1.336719,8.472094,1.008378,1.665059,65.0
7,2303_02 (8)_thickness,2560,2560,1.0,3.588672,-2.006641,5.08278,-2.203626,-1.809655,101.0
8,2303_04 (3)_thickness,2560,2560,1.0,4.366016,-2.095703,7.340583,-2.380191,-1.811215,74.0
9,2303_05 (4)_thickness,2560,2560,1.0,5.265234,-4.572266,5.649549,-4.791217,-4.353314,46.0
