Dorothee MG
Working Ingrid's analyses.
EDA data. 

formalities. 

In [None]:
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import neurokit2 as nk
import numpy as np
import os
import glob

# subjects 
subjects = [f"Subject{str(i).zfill(2)}" for i in range(1,83)]

In [None]:

summary_list = []

sampling_rate = 256  # Hz
data_dir = "data"
output_dir = "data_cleaned"
os.makedirs(output_dir, exist_ok=True)

for subject in subjects:
    print(f"\n## PROCESSING {subject} ##")
    
    eda_file = f"{data_dir}/{subject}_SC_baseline.xlsx"
    
    if not os.path.exists(eda_file):
        continue # no baseline for at least subject #08

    # normalize column naming
    eda_df.columns = [c.strip() for c in eda_df.columns]  # remove trailing spaces
    rename_map = {}
    if "Time" in eda_df.columns:
        rename_map["Time"] = "time"
    if "Skin Conductance" in eda_df.columns:
        rename_map["Skin Conductance"] = "amplitude"
    elif "Signal" in eda_df.columns:
        rename_map["Signal"] = "amplitude"
    if "Marqueurs" in eda_df.columns:
        rename_map["Marqueurs"] = "markers"
    eda_df.rename(columns=rename_map, inplace=True)

    # trim to 2 first minutes of recording
    eda_df = eda_df[eda_df["time"] <= 120].copy()

    # clean EDA signal
    eda_cleaned = nk.eda_clean(eda_df["amplitude"], sampling_rate=sampling_rate)

    # save cleaned data
    out_csv = os.path.join(output_dir, f"{subject}_EDA_cleaned_baseline.csv")
    pd.DataFrame({"time": eda_df["time"], "EDA_Clean": eda_cleaned}).to_csv(out_csv, index=False)


In [None]:


# settings

output_file = "EDA_baseline_AUC.csv"
segment_len = 6 # seconds per segment
fs = 256 # sampling rate

# function to compute the area under the curve (AUC)

def compute_auc_metrics(df):

    df = df[df["time"] <= 120] # keep only the first 2 min of recording

    # sampling frequency
    seg_samples = int(segment_len * fs)
    n_segments = int(120 / segment_len)

    auc_segments = []
    for i in range(n_segments):
        start = i * seg_samples
        end = (i + 1) * seg_samples
        seg = df.iloc[start:end]
        auc = np.trapz(seg["amplitude"], seg["time"])
        auc_segments.append(auc)

    # total AUC
    auc_total = np.trapz(df["amplitude"], df["time"])

    # positive and negative AUCs
    auc_pos = np.trapz(np.clip(df["amplitude"], a_min=0, a_max=None), df["time"])
    auc_neg = np.trapz(np.clip(df["amplitude"], a_min=None, a_max=0), df["time"])

    return auc_segments, auc_total, auc_pos, auc_neg

# loop over all subjects
results = []

for subject in subjects:
    print(f"PROCESSING {subject}") # tracking
    eda_file = f"data/{subject}_SC_baseline.xlsx"
    
    if not os.path.exists(eda_file):
        continue # no baseline for at least subject #08

    df = pd.read_excel(eda_file, dtype={"Marqueurs": str})
    df.columns = [c.strip() for c in df.columns]  # remove trailing spaces
    rename_map = {}
    if "Time" in df.columns:
        rename_map["Time"] = "time"
    if "Skin Conductance" in df.columns:
        rename_map["Skin Conductance"] = "amplitude"
    elif "Signal" in df.columns:
        rename_map["Signal"] = "amplitude"
    if "Marqueurs" in df.columns:
        rename_map["Marqueurs"] = "markers"
    df.rename(columns=rename_map, inplace=True)

    auc_segments, auc_total, auc_pos, auc_neg = compute_auc_metrics(df)

    # combine results in one dict
    res = {"subject": subject,
           "AUC_total": auc_total,
           "AUC_positive": auc_pos,
           "AUC_negative": auc_neg}
    for i, auc in enumerate(auc_segments, 1):
        res[f"AUC_seg{i:02d}"] = auc

    results.append(res)

### save results

summary_df = pd.DataFrame(results)
summary_df.to_csv(output_file, index=False)