In [28]:
# In[1]: Benötigte Pakete importieren
import pandas as pd
from pathlib import Path

# Pfad zum raw-Ordner
raw_folder = Path('processed/fixations')

# Name der Datei, die du einlesen möchtest (ohne Extension)
file_name = 'P000_id007_meme_ncc_processed_fixations'  # z.B. 'P035_id146_kategorie'
file_path = raw_folder / file_name
    
# In[2]: CSV einlesen
# Wenn deine Dateien keine .csv-Endung haben, füge sie hier explizit an:
df = pd.read_csv(f"{file_path}.csv", sep=',', header=0, encoding='utf-8')

# Oder, falls die Dateien wirklich keine Extension haben, so:
# df = pd.read_csv(file_path, sep=',', header=0, encoding='utf-8')

# In[3]: Auf die wichtigsten Spalten kürzen
# wichtige_spalten = ['Spalte1', 'Spalte2', 'Spalte3']  # ersetze durch deine Spaltennamen
# df_kurz = df[wichtige_spalten]

# In[4]: Ergebnisse anzeigen
print("Original-Shape:", df.shape)
df

Original-Shape: (11, 5)


Unnamed: 0,start_time,end_time,duration,x,y
0,0.0,183.041,183.041,340.222958,211.222904
1,216.298,332.787,116.489,433.662105,230.239079
2,399.352,665.511,266.159,472.597729,630.843735
3,732.209,1397.784,665.575,446.367709,140.055741
4,1414.424,1697.296,282.872,447.146005,135.982642
5,1763.8,3278.095,1514.295,484.248045,612.793803
6,3294.717,3461.057,166.34,456.666158,610.237996
7,3527.672,4209.873,682.201,460.208914,173.656518
8,4343.04,4559.347,216.307,462.617343,190.007488
9,4659.181,4975.416,316.235,466.507027,710.451884


In [3]:
import os
import glob
import ast

import numpy as np
import pandas as pd

def parse_point(s):
    try:
        x, y = ast.literal_eval(s)
        return float(x), float(y)
    except Exception:
        return np.nan, np.nan

def process_file(file_path, out_dir):
    df = pd.read_csv(file_path)
    df['millisecond'] = (df['system_time_stamp'] - df['system_time_stamp'].iloc[0]) / 1000

    # 1) Erst in numeric casten, dann auf plausiblen Bereich maskieren
    df['left_pupil_diameter']  = pd.to_numeric(df['left_pupil_diameter'],  errors='coerce')
    df['right_pupil_diameter'] = pd.to_numeric(df['right_pupil_diameter'], errors='coerce')
    df['left_pupil_diameter']  = df['left_pupil_diameter'].mask(lambda x: (x < 1.5) | (x > 8), np.nan)
    df['right_pupil_diameter'] = df['right_pupil_diameter'].mask(lambda x: (x < 1.5) | (x > 8), np.nan)

    # 2) Gaze‑Parsing
    left_pts  = df['left_gaze_point_on_display_area'] .apply(parse_point)
    right_pts = df['right_gaze_point_on_display_area'].apply(parse_point)
    df[['left_x','left_y']]   = pd.DataFrame(left_pts.tolist(),  index=df.index)
    df[['right_x','right_y']] = pd.DataFrame(right_pts.tolist(), index=df.index)

    # 3) Validity‑Flags (1 = gültig)
    lv = df['left_gaze_point_validity']  != 0
    rv = df['right_gaze_point_validity'] != 0

    df['x'] = np.where(lv & rv,
                       (df['left_x'] + df['right_x']) / 2,
                       np.where(lv, df['left_x'],
                                np.where(rv, df['right_x'], np.nan)))
    df['y'] = np.where(lv & rv,
                       (df['left_y'] + df['right_y']) / 2,
                       np.where(lv, df['left_y'],
                                np.where(rv, df['right_y'], np.nan)))

    # 4) Skalierung auf 800×800 und Clamping
    df['x'] = (df['x'] * 800).clip(0, 800)
    df['y'] = (df['y'] * 800).clip(0, 800)

    # 5) Pupil-Size kombinieren und erneut plausibelkeitsprüfen
    lv_p = df['left_pupil_diameter'].notna()
    rv_p = df['right_pupil_diameter'].notna()
    df['pupil_size'] = np.where(lv_p & rv_p,
                                (df['left_pupil_diameter'] + df['right_pupil_diameter']) / 2,
                                np.where(lv_p, df['left_pupil_diameter'],
                                         np.where(rv_p, df['right_pupil_diameter'], np.nan)))
    # Optional: noch einmal maskieren, falls Mittelwert aus zwei Außenseitern entsteht
    df['pupil_size'] = df['pupil_size'].mask(lambda x: (x < 1.5) | (x > 8), np.nan)

    # 6) Interpolation und Drop
    df[['x','y','pupil_size']] = df[['x','y','pupil_size']].interpolate()
    df_clean = df.dropna(subset=['x','y','pupil_size'])

    # 7) Speichern
    base = os.path.splitext(os.path.basename(file_path))[0]
    out_path = os.path.join(out_dir, f"{base}_processed.csv")
    df_clean[['millisecond','x','y','pupil_size']].to_csv(out_path, index=False)
    print(f"→ {os.path.basename(out_path)}")

def run_processing(folder):
    processed_dir = os.path.join(folder, "processed")
    os.makedirs(processed_dir, exist_ok=True)
    for fp in glob.glob(os.path.join(folder, "*.csv")):
        process_file(fp, processed_dir)

# === Notebook-Aufruf ===
folder = "raw"  # Pfad zu deinen Roh-CSV-Dateien
run_processing(folder)


→ P000_id001_meme_ncc_processed.csv
→ P000_id002_meme_ncc_processed.csv
→ P000_id003_meme_ncc_processed.csv
→ P000_id004_meme_ncc_processed.csv
→ P000_id005_meme_ncc_processed.csv
→ P000_id006_meme_ncc_processed.csv
→ P000_id007_meme_ncc_processed.csv
→ P000_id008_meme_ncc_processed.csv
→ P000_id009_meme_ncc_processed.csv
→ P000_id010_meme_ncc_processed.csv
→ P000_id011_ort_processed.csv
→ P000_id012_ort_processed.csv
→ P000_id013_ort_processed.csv
→ P000_id014_ort_processed.csv
→ P000_id015_ort_processed.csv
→ P000_id016_ort_processed.csv
→ P000_id017_ort_processed.csv
→ P000_id018_ort_processed.csv
→ P000_id019_ort_processed.csv
→ P000_id020_ort_processed.csv
→ P000_id021_ort_processed.csv
→ P000_id022_ort_processed.csv
→ P000_id023_ort_processed.csv
→ P000_id024_ort_processed.csv
→ P000_id025_ort_processed.csv
→ P000_id026_ort_processed.csv
→ P000_id027_ort_processed.csv
→ P000_id028_ort_processed.csv
→ P000_id029_ort_processed.csv
→ P000_id030_ort_processed.csv
→ P000_id031_ort_pro

KeyError: 'system_time_stamp'

In [3]:
import os
import glob

import numpy as np
import pandas as pd
from pandas.errors import EmptyDataError

def detect_fixations_with_pupil(df, dispersion_threshold=25, duration_threshold=100):
    """
    I-DT Algorithmus zur Fixationsdetektion mit mittlerer Pupillengröße.
    Erwartet Spalten: 'millisecond', 'x', 'y', 'pupil_size' (bereits plausibilisiert).
    Gibt DataFrame mit Spalten:
      ['start_time','end_time','duration','x','y','avg_pupil_size']
    """
    fixations = []
    x = df['x'].values
    y = df['y'].values
    p = df['pupil_size'].values
    t = df['millisecond'].values

    n = len(df)
    start = 0
    while start < n:
        end = start + 1
        # Fenster erweitern solange Dispersion ≤ threshold
        while end < n:
            win_x = x[start:end+1]
            win_y = y[start:end+1]
            if (win_x.max() - win_x.min() <= dispersion_threshold and
                win_y.max() - win_y.min() <= dispersion_threshold):
                end += 1
            else:
                break
        duration = t[end-1] - t[start]
        if duration >= duration_threshold:
            avg_pupil = np.nanmean(p[start:end])
            fixations.append({
                'start_time':     t[start],
                'end_time':       t[end-1],
                'duration':       duration,
                'x':              win_x.mean(),
                'y':              win_y.mean(),
                'avg_pupil_size': avg_pupil
            })
            start = end
        else:
            start += 1

    return pd.DataFrame(fixations)

def process_fixations_with_normalized_pupil(processed_folder, output_folder,
                                            dispersion_threshold=25, duration_threshold=100):
    """
    Liest alle *_processed.csv (mit plausibilisierter pupil_size) aus processed_folder ein,
    berechnet Fixationen inkl. avg_pupil_size, normalisiert avg_pupil_size pro Datei,
    und speichert als *_fixations.csv in output_folder.
    """
    os.makedirs(output_folder, exist_ok=True)
    pattern = os.path.join(processed_folder, "*_processed.csv")
    for path in glob.glob(pattern):
        subj = os.path.splitext(os.path.basename(path))[0].replace("_processed", "")
        try:
            df = pd.read_csv(path)
        except EmptyDataError:
            print(f"Skipping {subj}: empty or invalid processed file")
            continue
        if df.empty:
            print(f"Skipping {subj}: no data")
            continue

        # Fixation detection
        fix_df = detect_fixations_with_pupil(df, dispersion_threshold, duration_threshold)
        if fix_df.empty:
            print(f"No fixations for {subj}")
            continue

        # Normalisierung der avg_pupil_size (Z-Score pro Proband)
        mean_p = fix_df['avg_pupil_size'].mean()
        std_p = fix_df['avg_pupil_size'].std()
        fix_df['pupil_size_norm'] = (fix_df['avg_pupil_size'] - mean_p) / std_p

        # Speichern
        out_csv = os.path.join(output_folder, f"{subj}_fixations.csv")
        fix_df.to_csv(out_csv, index=False)
        print(f"Saved fixations with normalized pupil size: {os.path.basename(out_csv)}")

# ==== Notebook-Aufruf ====
processed_folder   = "processed"                # Ordner mit *_processed.csv
fixations_folder   = "processed/fixations"      # Zielordner für *_fixations.csv
process_fixations_with_normalized_pupil(processed_folder, fixations_folder)


Saved fixations with normalized pupil size: P000_id001_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id002_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id003_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id004_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id005_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id006_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id007_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id008_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id009_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id010_meme_ncc_fixations.csv
Saved fixations with normalized pupil size: P000_id011_ort_fixations.csv
Saved fixations with normalized pupil size: P000_id012_ort_fixations.csv
Saved fixations with normalized pupil size: P000_id013_ort_fixations.csv
S

In [2]:
import os
import glob

def delete_double_processed(folder_path):
    """
    Löscht alle Dateien im Ordner, deren Name vor 'processed' zwei Unterstriche enthält,
    z. B. 'P025_id057_person__processed.csv' oder '…__processed.txt'.
    """
    pattern = os.path.join(folder_path, "*__processed*")
    for file_path in glob.glob(pattern):
        try:
            os.remove(file_path)
            print(f"Deleted: {os.path.basename(file_path)}")
        except OSError as e:
            print(f"Error deleting {os.path.basename(file_path)}: {e}")

# === Anwendung im Notebook ===
folder = "raw/processed"  # <-- anpassen!
delete_double_processed(folder)


Deleted: P001_id001_meme_ncc__processed.csv
Deleted: P001_id002_meme_ncc__processed.csv
Deleted: P001_id003_meme_ncc__processed.csv
Deleted: P001_id004_meme_ncc__processed.csv
Deleted: P001_id005_meme_ncc__processed.csv
Deleted: P001_id006_meme_ncc__processed.csv
Deleted: P001_id007_meme_ncc__processed.csv
Deleted: P001_id008_meme_ncc__processed.csv
Deleted: P001_id009_meme_ncc__processed.csv
Deleted: P001_id010_meme_ncc__processed.csv
Deleted: P001_id011_ort__processed.csv
Deleted: P001_id012_ort__processed.csv
Deleted: P001_id013_ort__processed.csv
Deleted: P001_id014_ort__processed.csv
Deleted: P001_id015_ort__processed.csv
Deleted: P001_id016_ort__processed.csv
Deleted: P001_id017_ort__processed.csv
Deleted: P001_id018_ort__processed.csv
Deleted: P001_id019_ort__processed.csv
Deleted: P001_id020_ort__processed.csv
Deleted: P001_id021_ort__processed.csv
Deleted: P001_id022_ort__processed.csv
Deleted: P001_id023_ort__processed.csv
Deleted: P001_id024_ort__processed.csv
Deleted: P001_