Dorothee MG
Working on Ingrid's analyses.
EDA data. 

formalities. 

In [None]:
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import neurokit2 as nk
import numpy as np
import os

# subjects 
subjects = [f"Subject{str(i).zfill(2)}" for i in range(1,83)] # list of all subject IDs

In [None]:

summary_list = []  # collect results here 

for subject in subjects: 
    
    print(f' ## PROCESSING {subject} ##\n') # tracking
    
    eda_file = f"data/{subject}_SC.xlsx" # name of the file
    eda_df = pd.read_excel(eda_file, dtype={"Marqueurs": str}) # read the file, marqueurs = string
    eda_df.rename(columns={"Time": "time", "Skin Conductance": "amplitude", "Marqueurs": "markers"}, inplace=True) # rename columns
    
    
    # plot the raw data
    plt.figure(figsize=(12, 4))
    plt.plot(eda_df['time'], eda_df['amplitude'])
    plt.title(f"raw EDA signal - {subject}") # name the subject in the title
    plt.xlabel("time (s)")
    plt.ylabel("skin Conductance (µS)")
    plt.tight_layout()
    out_png = os.path.join(f"figures_raw_EDA/{subject}_raw_EDA.png") # save it
    plt.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.close()
    
    # clean the data
    sampling_rate = 256 # I previously established the sampling rate
    eda_cleaned = nk.eda_clean(eda_df['amplitude'], sampling_rate=sampling_rate) # clean using NeuroKit
    signals, info = nk.eda_process(eda_cleaned, sampling_rate=sampling_rate) # extract info

    # plot raw and cleaned data
    plt.figure(figsize=(12, 6))
    plt.subplot(2, 1, 1)
    plt.plot(signals["EDA_Raw"], label="Raw EDA", alpha=0.5)
    plt.plot(signals["EDA_Clean"], label="Cleaned EDA")
    plt.legend()
    plt.title("EDA signal and cleaning")

    plt.subplot(2, 1, 2)
    plt.plot(signals["EDA_Tonic"], label="Tonic")
    plt.plot(signals["EDA_Phasic"], label="Phasic")
    plt.scatter(signals.index[signals["SCR_Peaks"] == 1],
                signals["EDA_Phasic"][signals["SCR_Peaks"] == 1],
                color="red", label="SCR Peaks")
    plt.legend()
    plt.title("Tonic & Phasic Components + Peaks")

    plt.tight_layout()
    plt.show()

    # QUALITY METRICS ------------------------------------------------------------------------
    
    # signal range
    min_val = round(eda_cleaned.min(), 2)
    max_val = round(eda_cleaned.max(), 2)
    signal_range = f"{min_val}-{max_val}"

    # abrupt spikes
    diffs = np.abs(np.diff(eda_cleaned))
    spike_ratio = np.sum(diffs > 1) / len(diffs) * 100  # %
    abrupt_spikes = round(spike_ratio, 2)

    # flatlines
    flat_threshold = 0.005 # if there is less than .005 range
    window = sampling_rate * 5 # 5sec window
    rolling_std = pd.Series(eda_cleaned).rolling(window).std()
    flat_ratio = np.sum(rolling_std < flat_threshold) / len(rolling_std) * 100  # %
    flatlines = round(flat_ratio, 2)

    # SCR detection
    num_peaks = len(info["SCR_Peaks"])
    duration_min = eda_df["time"].iloc[-1] / 60 # number of "peaks" per minute
    scrs_per_min = round(num_peaks / duration_min, 2)

    # store results
    summary_list.append({
        "subject": subject,
        "min_val": min_val,
        "max_val": max_val,
        "signal_range": signal_range,
        "abrupt_spikes": abrupt_spikes,
        "flatlines": flatlines,
        "scr_detection_per_min": scrs_per_min})

    # plot cleaned signal
    fig, ax = plt.subplots(figsize=(20, 5))
    ax.plot(eda_df["time"], eda_cleaned, label="EDA cleaned")

    # indentify the markers (when the stimulus appears)
    df_debut_markers = eda_df[eda_df["markers"].str.startswith("Debut", na=False)].copy()
    df_debut_markers["emotion"] = df_debut_markers["markers"].str.replace("Debut_", "", regex=False)

    emotion_counts = {}
    df_debut_markers["label"] = [
        f"{emo}_{emotion_counts.setdefault(emo, 0)+1}" if emo in emotion_counts else f"{emo}_1"
        for emo in df_debut_markers["emotion"]]

    for _, row in df_debut_markers.iterrows():
        ax.axvline(x=row["time"], color="red", linestyle="--", alpha=0.8, linewidth=0.5)
        ax.text(row["time"], ax.get_ylim()[1] * 0.98, row["label"], color="red", fontsize=6,
                rotation=90, ha="center", va="top", alpha=0.8)

    ax.set_xlabel("time (s)")
    ax.set_ylabel("skin Conductance (µS)")
    ax.set_title(f"EDA - {subject}")
    ax.legend()

    plt.tight_layout()
    out_final = os.path.join("figures_final_EDA", f"{subject}_cleaned_EDA.png") # save the plot
    plt.savefig(out_final, dpi=300, bbox_inches="tight")
    plt.close()

    # save the cleaned signal
    out_csv = os.path.join("data_cleaned", f"{subject}_EDA_cleaned.csv")
    os.makedirs("data_cleaned", exist_ok=True)
    pd.DataFrame({"time": eda_df["time"], "EDA_Clean": eda_cleaned}).to_csv(out_csv, index=False)

# quality summary across participants 
summary_df = pd.DataFrame(summary_list)
summary_df.to_excel("EDA_notes_nettoyage.xlsx", index=False)
