In [1]:
import os
import re
import pandas as pd
from pathlib import Path
from typing import List, Dict

# --- Pfade anpassen ---
BASE = Path(".")  # aktueller Ordner
labels_path = BASE / "labels_per_id.csv"
search_dir = BASE / "fixations"  # Ordner mit den Fixations-CSV-Dateien

# --- Feste Kategorien und Bitreihenfolge ---
CATEGORIES = ["meme", "ort", "person", "politik", "text"]

# --- Hilfsfunktionen ---
fname_re = re.compile(r"^(P\d{3})_id(\d{3})_([a-zA-Z0-9\-]+)\.csv$")

def parse_filename(name: str):
    m = fname_re.match(name)
    if not m:
        return None
    participant, id_str, cat = m.groups()
    return participant, id_str, cat

def detect_label_columns(df: pd.DataFrame, categories: List[str]) -> List[str]:
    return [c for c in categories if c in df.columns]

def detect_weight_columns(df: pd.DataFrame, categories: List[str]) -> Dict[str, str]:
    lower_cols = {c.lower(): c for c in df.columns}
    mapping = {}
    for cat in categories:
        best = None
        for lc, orig in lower_cols.items():
            if "weight" in lc and cat in lc:
                best = orig
                break
            if re.search(rf"(^w[_\-]*{cat}|{cat}[_\-]*w$)", lc):
                best = orig
                break
        if best:
            mapping[cat] = best
    return mapping

def to_bitstring(row: pd.Series, label_cols: List[str], categories: List[str]) -> str:
    bits = []
    for cat in categories:
        if cat in label_cols:
            val = row[cat]
            bit = 1 if pd.notna(val) and (bool(val) and str(val) not in ("0", "False", "false")) else 0
        else:
            bit = 0
        bits.append(str(int(bit)))
    return "".join(bits)

def normalize_id(v):
    s = str(v)
    m = re.search(r"(\d{1,3})(?!.*\d)", s)
    if m:
        return f"{int(m.group(1)):03d}"
    return None

# --- Labels laden ---
labels_df = pd.read_csv(labels_path)

# ID-Spalte finden
id_col_candidates = ["id", "image_id", "img_id", "stim_id", "stimulus_id", "stimulus", "file_id"]
id_col = next((c for c in id_col_candidates if c in labels_df.columns), None)
if id_col is None:
    for c in labels_df.columns:
        if labels_df[c].astype(str).str.match(r"^P\d{3}_id\d{3}").any():
            id_col = c
            break
if id_col is None:
    raise ValueError("Keine ID-Spalte in labels_per_id.csv gefunden.")

labels_df["id3"] = labels_df[id_col].apply(normalize_id)

# Label- und Gewichtsspalten erkennen
label_cols = detect_label_columns(labels_df, CATEGORIES)
weight_cols_map = detect_weight_columns(labels_df, CATEGORIES)

# Bits erzeugen
labels_df["bits"] = labels_df.apply(lambda r: to_bitstring(r, label_cols, CATEGORIES), axis=1)

merge_cols = ["id3", "bits"] + list(set(weight_cols_map.values()))
labels_small = labels_df[merge_cols].copy()

# --- Fixations-Dateien verarbeiten (in-place) ---
processed = []
for csv_path in search_dir.glob("P*_id*_[a-zA-Z0-9-]*.csv"):
    name = csv_path.name
    parsed = parse_filename(name)
    if not parsed:
        continue
    participant, id3, cat = parsed

    row = labels_small[labels_small["id3"] == id3]
    if row.empty:
        processed.append({"original": name, "status": "SKIPPED"})
        continue

    r = row.iloc[0]
    bits = r["bits"]

    # CSV laden und Gewichte hinzufügen
    df_fix = pd.read_csv(csv_path)
    for cat_name, w_col in weight_cols_map.items():
        if w_col in row.columns:
            df_fix[f"weight_{cat_name}"] = r[w_col]

    # Datei direkt überschreiben
    df_fix.to_csv(csv_path, index=False)

    # Datei umbenennen (mit Bits)
    new_name = f"{name[:-4]}_{bits}.csv"
    new_path = csv_path.with_name(new_name)
    csv_path.rename(new_path)

    processed.append({
        "original": name,
        "new_name": new_name,
        "status": "OK",
        "path": str(new_path)
    })

# --- Ergebnisübersicht ---
report_df = pd.DataFrame(processed)
print(report_df)


                 original                   new_name status  \
0     P000_id001_meme.csv  P000_id001_meme_10000.csv     OK   
1     P000_id002_meme.csv  P000_id002_meme_10000.csv     OK   
2     P000_id003_meme.csv  P000_id003_meme_10000.csv     OK   
3     P000_id004_meme.csv  P000_id004_meme_10000.csv     OK   
4     P000_id005_meme.csv  P000_id005_meme_10000.csv     OK   
...                   ...                        ...    ...   
7357  P054_id149_text.csv  P054_id149_text_10001.csv     OK   
7358  P054_id150_text.csv  P054_id150_text_10001.csv     OK   
7359  P054_id151_text.csv  P054_id151_text_10011.csv     OK   
7360  P054_id152_text.csv  P054_id152_text_10011.csv     OK   
7361  P054_id153_text.csv  P054_id153_text_10011.csv     OK   

                                     path  
0     fixations\P000_id001_meme_10000.csv  
1     fixations\P000_id002_meme_10000.csv  
2     fixations\P000_id003_meme_10000.csv  
3     fixations\P000_id004_meme_10000.csv  
4     fixations\P000_id

In [2]:
import pandas as pd
from pathlib import Path

# Ordner mit deinen CSVs
fixations_dir = Path("fixations")

for csv_path in fixations_dir.glob("*.csv"):
    df = pd.read_csv(csv_path)

    # Alle Spalten finden, die mit "weight_" beginnen
    weight_cols = [col for col in df.columns if col.startswith("weight_")]

    if weight_cols:
        for col in weight_cols:
            if not df.empty:
                value = df.loc[0, col]  # ursprünglicher Wert aus erster Zeile
                df[col] = None          # alle Zellen leeren
                df.loc[0, col] = value  # nur erste Zeile setzen

        # Datei überschreiben
        df.to_csv(csv_path, index=False)
        print(f"angepasst: {csv_path.name}")
    else:
        print(f"keine weight-Spalten in: {csv_path.name}")


angepasst: P000_id001_meme_10000.csv
angepasst: P000_id002_meme_10000.csv
angepasst: P000_id003_meme_10000.csv
angepasst: P000_id004_meme_10000.csv
angepasst: P000_id005_meme_10000.csv
angepasst: P000_id006_meme_10000.csv
angepasst: P000_id007_meme_10000.csv
angepasst: P000_id008_meme_10000.csv
angepasst: P000_id009_meme_10000.csv
angepasst: P000_id010_meme_10000.csv
angepasst: P000_id011_ort_01001.csv
angepasst: P000_id012_ort_01001.csv
angepasst: P000_id013_ort_01001.csv
angepasst: P000_id014_ort_01000.csv
angepasst: P000_id015_ort_01001.csv
angepasst: P000_id016_ort_01001.csv
angepasst: P000_id017_ort_01001.csv
angepasst: P000_id018_ort_01000.csv
angepasst: P000_id019_ort_01000.csv
angepasst: P000_id020_ort_01000.csv
angepasst: P000_id021_ort_01000.csv
angepasst: P000_id022_ort_01000.csv
angepasst: P000_id023_ort_01000.csv
angepasst: P000_id024_ort_01000.csv
angepasst: P000_id025_ort_01000.csv
angepasst: P000_id026_ort_01000.csv
angepasst: P000_id027_ort_01000.csv
angepasst: P000_id