# Calculating alpha coherence across all channels for the 60 chosen epochs per subject 

In [None]:
import os
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import make_interp_spline

import mne
from mne_connectivity import spectral_connectivity_epochs

In [None]:
# --- Load precomputed top 60 epochs per subject ---
with open("E:/ChristianMusaeus/top_epochs_per_subject.pkl", "rb") as f:
    top_epochs_per_subject = pickle.load(f)

data_dir = "G:/ChristianMusaeus/Preprocessed_setfiles/"
fmin, fmax = 8, 13  # Alpha band
subject_coherence = {}

for subject_id in list(top_epochs_per_subject.keys()):
    print(f"Processing coherence for {subject_id}...")

    try:
        set_path = os.path.join(data_dir, f"{subject_id}_epoched.set")
        epochs = mne.io.read_epochs_eeglab(set_path, verbose='ERROR')

        selected_indices = top_epochs_per_subject[subject_id]
        selected_data = epochs.get_data()[selected_indices]  # shape: (60, n_channels, n_times)

        print("Shape of selected data:", selected_data.shape)
        print("NaNs in data:", np.isnan(selected_data).sum())
        print("Avg signal amplitude:", np.mean(np.abs(selected_data)))

        selected_epochs = mne.EpochsArray(selected_data, epochs.info, tmin=epochs.tmin)

        con = spectral_connectivity_epochs(
            selected_epochs,
            method='coh',
            mode='fourier',
            fmin=fmin,
            fmax=fmax,
            sfreq=selected_epochs.info['sfreq'],
            faverage=True,
            verbose=False,
        )

        coh_matrix = np.squeeze(con.get_data(output='dense'))

        mask = np.ones(coh_matrix.shape, dtype=bool)
        np.fill_diagonal(mask, False)

        nonzero_values = coh_matrix[mask]
        nonzero_values = nonzero_values[nonzero_values > 0]

        if len(nonzero_values) == 0:
            print(f" No nonzero coherence for subject {subject_id}, skipping")
            continue

        mean_coherence = np.mean(nonzero_values)
        print(" Mean alpha coherence:", mean_coherence)

        subject_coherence[subject_id] = mean_coherence

    except Exception as e:
        print(f" Error processing {subject_id}: {e}")


### Plots the coherence calues for each subject

In [None]:
metadata = pd.read_csv("metadata_time_filtered.csv")
metadata["subject_id"] = metadata["subject_id"].astype(str)

# Convert coherence dict to DataFrame
coh_df = pd.DataFrame.from_dict(subject_coherence, orient='index', columns=["mean_coherence"])
coh_df.index.name = "subject_id"
coh_df.reset_index(inplace=True)
coh_df['subject_id'] = coh_df['subject_id'].astype(str)

# Merge coherence data with metadata on subject_id
merged = pd.merge(coh_df, metadata, left_on="subject_id", right_on="subject_id")
merged = merged.dropna(subset=["age", "mean_coherence"])
age_grouped_coherence = merged.groupby('age')['mean_coherence'].mean().reset_index()

# --- Plot ---
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
sns.scatterplot(data=merged, x="age", y="mean_coherence", alpha=0.6)
plt.xlabel("Age")
plt.ylabel("Mean Alpha Coherence (8–13 Hz)")
plt.title("Mean Alpha Coherence across Age")
plt.xticks(np.arange(0,95,10))
plt.grid(True)
plt.tight_layout()
plt.show()

### Plots the mean coherence values of all subjects with a specific age

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(data=age_grouped_coherence, x="age", y="mean_coherence", alpha=0.6)
plt.xlabel("Age")
plt.ylabel("Mean Alpha Coherence (8–13 Hz)")
plt.title("Mean Alpha Coherence across Age")
plt.grid(True)
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()

### Plots the mean coherence values of all subjects with a specific age, after removing values that differ with more than 3 standard deviations, and confidence intervals 

In [None]:
# Calculate mean and std of coherence
mean_coh = merged['mean_coherence'].mean()
std_coh = merged['mean_coherence'].std()

# Define lower and upper bounds
lower_bound = mean_coh - 3 * std_coh
upper_bound = mean_coh + 3 * std_coh

# Filter out outliers
filtered = merged[
    (merged['mean_coherence'] >= lower_bound) & 
    (merged['mean_coherence'] <= upper_bound)
]

# Then group by age and average as before
age_grouped = filtered.groupby('age')['mean_coherence'].mean().reset_index()

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 5))
sns.scatterplot(data=age_grouped, x='age', y='mean_coherence')
plt.xlabel('Age')
plt.ylabel('Average Alpha Coherence (8–13 Hz)')
plt.title('Average Alpha Coherence across Age')
plt.grid(True)
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()


### Plots the connected, smoothed scatterplot after outlier removal

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# Step 1: Create DataFrame from the real coherence dict
df_real_coh = pd.DataFrame({
    "subject_id": list(subject_coherence.keys()),
    "real_coherence": list(subject_coherence.values())
})

# Step 2: Merge with metadata to get ages
metadata["subject_id"] = metadata["subject_id"].astype(str)
df_real_coh["subject_id"] = df_real_coh["subject_id"].astype(str)
merged_real = pd.merge(df_real_coh, metadata[["subject_id", "age"]], on="subject_id", how="inner")

# Step 3: Remove outliers (mean ± 3 * std)
mean_coh = merged_real["real_coherence"].mean()
std_coh = merged_real["real_coherence"].std()
lower_bound = mean_coh - 3 * std_coh
upper_bound = mean_coh + 3 * std_coh

filtered_real = merged_real[
    (merged_real["real_coherence"] >= lower_bound) &
    (merged_real["real_coherence"] <= upper_bound)
]

# Step 4: Group by age and calculate mean, std, count on filtered data
grouped_real = filtered_real.groupby("age").agg(
    mean_coherence=("real_coherence", "mean"),
    std_coherence=("real_coherence", "std"),
    n=("real_coherence", "count")
).reset_index()

# Step 5: Calculate 95% confidence interval
grouped_real["sem"] = grouped_real["std_coherence"] / np.sqrt(grouped_real["n"])
grouped_real["ci_upper"] = grouped_real["mean_coherence"] + 1.96 * grouped_real["sem"]
grouped_real["ci_lower"] = grouped_real["mean_coherence"] - 1.96 * grouped_real["sem"]

# Step 6: Sort by age
grouped_real_sorted = grouped_real.sort_values("age")

# Step 7: Extract arrays and clean NaNs/Infs for spline
x = grouped_real_sorted["age"].values
y = grouped_real_sorted["mean_coherence"].values
ci_upper = grouped_real_sorted["ci_upper"].values
ci_lower = grouped_real_sorted["ci_lower"].values

valid_mask = (~np.isnan(x)) & (~np.isnan(y)) & (~np.isnan(ci_upper)) & (~np.isnan(ci_lower)) & \
             (~np.isinf(x)) & (~np.isinf(y)) & (~np.isinf(ci_upper)) & (~np.isinf(ci_lower))

x_clean = x[valid_mask]
y_clean = y[valid_mask]
ci_upper_clean = ci_upper[valid_mask]
ci_lower_clean = ci_lower[valid_mask]

# Step 8: Spline smoothing (degree k=5)
x_smooth = np.linspace(x_clean.min(), x_clean.max(), 300)

mean_spline = make_interp_spline(x_clean, y_clean, k=5)
upper_spline = make_interp_spline(x_clean, ci_upper_clean, k=5)
lower_spline = make_interp_spline(x_clean, ci_lower_clean, k=5)

y_smooth = mean_spline(x_smooth)
ci_upper_smooth = upper_spline(x_smooth)
ci_lower_smooth = lower_spline(x_smooth)

# Step 9: Plot
plt.figure(figsize=(12, 6))

# Scatter original points (filtered, no smoothing)
plt.scatter(x_clean, y_clean, alpha=0.7, color='blue', label="Mean per Age", s=25)

# Smoothed mean curve
plt.plot(x_smooth, y_smooth, color="blue", linewidth=1.5, label="Smoothed Mean")

# Smoothed confidence interval
plt.fill_between(x_smooth, ci_lower_smooth, ci_upper_smooth, color="lightblue", alpha=0.4, label="95% CI")

plt.xlabel("Age")
plt.ylabel("Average Alpha Coherence (8–13 Hz)")
plt.title("Average Alpha Coherence across Age")
plt.grid(True)
plt.legend()
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()


### Plots sex-specific coherence against age 

In [None]:
# Step 1: Create DataFrame from the real coherence dict
df_real_coh = pd.DataFrame({
    "subject_id": list(subject_coherence.keys()),
    "real_coherence": list(subject_coherence.values())
})

# Step 2: Merge with metadata to get ages and gender
metadata["subject_id"] = metadata["subject_id"].astype(str)
df_real_coh["subject_id"] = df_real_coh["subject_id"].astype(str)
merged_real = pd.merge(df_real_coh, metadata[["subject_id", "age", "sex"]], on="subject_id", how="inner")

# Define colors and labels
gender_colors = {"female": "blue", "male": "green"}
gender_labels = {"female": "Female", "male": "Male"}

plt.figure(figsize=(14, 7))

for gender in ["female", "male"]:
    # Filter by gender (case-insensitive)
    data_gender = merged_real[merged_real["sex"].str.lower() == gender]
    
    # Remove outliers
    mean_coh = data_gender["real_coherence"].mean()
    std_coh = data_gender["real_coherence"].std()
    lower_bound = mean_coh - 3 * std_coh
    upper_bound = mean_coh + 3 * std_coh

    filtered = data_gender[
        (data_gender["real_coherence"] >= lower_bound) &
        (data_gender["real_coherence"] <= upper_bound)
    ]

    # Group by age
    grouped = filtered.groupby("age").agg(
        mean_coherence=("real_coherence", "mean"),
        std_coherence=("real_coherence", "std"),
        n=("real_coherence", "count")
    ).reset_index()

    # Calculate 95% CI
    grouped["sem"] = grouped["std_coherence"] / np.sqrt(grouped["n"])
    grouped["ci_upper"] = grouped["mean_coherence"] + 1.96 * grouped["sem"]
    grouped["ci_lower"] = grouped["mean_coherence"] - 1.96 * grouped["sem"]

    # Sort by age and clean NaNs/Infs
    grouped_sorted = grouped.sort_values("age")
    x = grouped_sorted["age"].values
    y = grouped_sorted["mean_coherence"].values
    ci_upper = grouped_sorted["ci_upper"].values
    ci_lower = grouped_sorted["ci_lower"].values

    valid_mask = (~np.isnan(x)) & (~np.isnan(y)) & (~np.isnan(ci_upper)) & (~np.isnan(ci_lower)) & \
                 (~np.isinf(x)) & (~np.isinf(y)) & (~np.isinf(ci_upper)) & (~np.isinf(ci_lower))

    x_clean = x[valid_mask]
    y_clean = y[valid_mask]
    ci_upper_clean = ci_upper[valid_mask]
    ci_lower_clean = ci_lower[valid_mask]

    # Spline smoothing (k=5)
    x_smooth = np.linspace(x_clean.min(), x_clean.max(), 300)
    mean_spline = make_interp_spline(x_clean, y_clean, k=5)
    upper_spline = make_interp_spline(x_clean, ci_upper_clean, k=5)
    lower_spline = make_interp_spline(x_clean, ci_lower_clean, k=5)

    y_smooth = mean_spline(x_smooth)
    ci_upper_smooth = upper_spline(x_smooth)
    ci_lower_smooth = lower_spline(x_smooth)

    # Plot scatter (no legend)
    plt.scatter(x_clean, y_clean, alpha=0.6, color=gender_colors[gender], s=25, label="_nolegend_")

    # Plot confidence interval (no legend)
    plt.fill_between(x_smooth, ci_lower_smooth, ci_upper_smooth, color=gender_colors[gender], alpha=0.2, label="_nolegend_")

    # Plot smoothed mean line (with legend)
    plt.plot(x_smooth, y_smooth, color=gender_colors[gender], linewidth=1.5, label=gender_labels[gender])

plt.xlabel("Age")
plt.ylabel("Average Alpha Coherence (8–13 Hz)")
plt.title("Average Alpha Coherence across Age by Sex")
plt.grid(True)
plt.legend()
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()


# Calculates imaginary part of coherence 

In [None]:
data_dir = "G:/ChristianMusaeus/Preprocessed_setfiles/"
fmin, fmax = 8, 13  # Alpha band

subject_coherence_imaginary = {}

for subject_id in list(top_epochs_per_subject.keys()):
    print(f"Processing coherence for {subject_id}...")

    try:
        set_path = os.path.join(data_dir, f"{subject_id}_epoched.set")
        epochs = mne.io.read_epochs_eeglab(set_path, verbose='ERROR')

        # Use your top epochs indices to extract data array directly
        selected_data = epochs.get_data()[top_epochs_per_subject[subject_id]]  # shape: (60, n_channels, n_times)
        print("Shape of slected data:", selected_data.shape)
        print(np.isnan(selected_data).sum())
        print("Average signal amplitude:", np.mean(np.abs(selected_data)))
    

        # Create new EpochsArray with just selected data and original info
        selected_epochs = mne.EpochsArray(selected_data, epochs.info, tmin=epochs.tmin)

        # Compute coherence on these epochs
        con_imaginary = spectral_connectivity_epochs(
            selected_epochs,
            method='imcoh',
            mode='fourier',
            fmin=8,
            fmax=13,
            sfreq=selected_epochs.info['sfreq'],
            faverage=True,
            verbose=False,
        )

        coh_matrix = np.squeeze(con_imaginary.get_data(output='dense'))

        mask = np.ones(coh_matrix.shape, dtype=bool)
        np.fill_diagonal(mask, False)  # exclude diagonal

        nonzero_values = coh_matrix[mask]
        nonzero_values = nonzero_values[nonzero_values > 0]

        if len(nonzero_values) == 0:
            print(f"{i}: No nonzero coherence for subject {subject_id}, skipping")
            continue

        mean_imaginary_coherence = np.mean(nonzero_values)
        print("Mean imaginary alpha coherence", mean_imaginary_coherence)

        subject_coherence_imaginary[subject_id] = mean_imaginary_coherence

    except Exception as e:
        print(f"Error processing {subject_id}: {e}")


### Plots imaginary coherence values for each subject across age

In [None]:
metadata = pd.read_csv("metadata_time_filtered.csv")
metadata["subject_id"] = metadata["subject_id"].astype(str)

# Convert coherence dict to DataFrame
coh_df_imaginary = pd.DataFrame.from_dict(subject_coherence_imaginary, orient='index', columns=["mean_imaginary_coherence"])
coh_df_imaginary.index.name = "subject_id"
coh_df_imaginary.reset_index(inplace=True)
coh_df_imaginary['subject_id'] = coh_df['subject_id'].astype(str)

# Merge coherence data with metadata on subject_id
merged_imaginary = pd.merge(coh_df_imaginary, metadata, left_on="subject_id", right_on="subject_id")
merged_imaginary = merged_imaginary.dropna(subset=["age", "mean_imaginary_coherence"])
age_grouped_imaginary = merged_imaginary.groupby('age')['mean_imaginary_coherence'].mean().reset_index()

# --- Plot ---
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
sns.scatterplot(data=merged_imaginary, x="age", y="mean_imaginary_coherence", alpha=0.6)
plt.xlabel("Age")
plt.ylabel("Mean Imaginary Alpha Coherence (8–13 Hz)")
plt.title("Mean Imaginary Alpha Coherence across Age")
plt.xticks(np.arange(0,95,10))
plt.grid(True)
plt.tight_layout()
plt.show()

### Plots imagianry coherence mean values for each age 

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(data=age_grouped_imaginary, x="age", y="mean_imaginary_coherence", alpha=0.6)
plt.xlabel("Age")
plt.ylabel("Mean Imaginary Alpha Coherence (8–13 Hz)")
plt.title("Mean Imaginary Alpha Coherence across Age")
plt.grid(True)
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()

### Removes outliers that are more than 3 standard deviations off the mean and plots mean imaginary coherence for each age

In [None]:
# Calculate mean and std of coherence
mean_coh_imaginary = merged_imaginary['mean_imaginary_coherence'].mean()
std_coh_imaginary = merged_imaginary['mean_imaginary_coherence'].std()

# Define lower and upper bounds
lower_bound_imaginary = mean_coh_imaginary - 3 * std_coh_imaginary
upper_bound_imaginary = mean_coh_imaginary + 3 * std_coh_imaginary

# Filter out outliers
filtered_imaginary = merged_imaginary[
    (merged_imaginary['mean_imaginary_coherence'] >= lower_bound_imaginary) & 
    (merged_imaginary['mean_imaginary_coherence'] <= upper_bound_imaginary)
]

# Then group by age and average as before
age_grouped_imaginary = filtered_imaginary.groupby('age')['mean_imaginary_coherence'].mean().reset_index()

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 5))
sns.scatterplot(data=age_grouped_imaginary, x='age', y='mean_imaginary_coherence')
plt.xlabel('Age')
plt.ylabel('Average Imaginary Alpha Coherence (8–13 Hz)')
plt.title('Average Imaginary Alpha Coherence across Age')
plt.grid(True)
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()


### Plots the connected, smoothed scatterplot after outlier removal

In [None]:
# Step 1: Create DataFrame from imaginary coherence dict
df_imag_coh = pd.DataFrame({
    "subject_id": list(subject_coherence_imaginary.keys()),
    "imaginary_coherence": list(subject_coherence_imaginary.values())
})

# Step 2: Merge with metadata to get ages
metadata["subject_id"] = metadata["subject_id"].astype(str)
df_imag_coh["subject_id"] = df_imag_coh["subject_id"].astype(str)
merged_imag = pd.merge(df_imag_coh, metadata[["subject_id", "age"]], on="subject_id", how="inner")

# Step 3: Remove outliers (mean ± 3 * std)
mean_coh_imag = merged_imag["imaginary_coherence"].mean()
std_coh_imag = merged_imag["imaginary_coherence"].std()
lower_bound_imag = mean_coh_imag - 3 * std_coh_imag
upper_bound_imag = mean_coh_imag + 3 * std_coh_imag

filtered_imag = merged_imag[
    (merged_imag["imaginary_coherence"] >= lower_bound_imag) &
    (merged_imag["imaginary_coherence"] <= upper_bound_imag)
]

# Step 4: Group by age and calculate mean, std, count on filtered data
grouped_imag = filtered_imag.groupby("age").agg(
    mean_coherence=("imaginary_coherence", "mean"),
    std_coherence=("imaginary_coherence", "std"),
    n=("imaginary_coherence", "count")
).reset_index()

# Step 5: Calculate 95% confidence interval
grouped_imag["sem"] = grouped_imag["std_coherence"] / np.sqrt(grouped_imag["n"])
grouped_imag["ci_upper"] = grouped_imag["mean_coherence"] + 1.96 * grouped_imag["sem"]
grouped_imag["ci_lower"] = grouped_imag["mean_coherence"] - 1.96 * grouped_imag["sem"]

# Step 6: Sort by age
grouped_imag_sorted = grouped_imag.sort_values("age")

# Step 7: Extract arrays and clean NaNs/Infs for spline
x = grouped_imag_sorted["age"].values
y = grouped_imag_sorted["mean_coherence"].values
ci_upper = grouped_imag_sorted["ci_upper"].values
ci_lower = grouped_imag_sorted["ci_lower"].values

valid_mask = (~np.isnan(x)) & (~np.isnan(y)) & (~np.isnan(ci_upper)) & (~np.isnan(ci_lower)) & \
             (~np.isinf(x)) & (~np.isinf(y)) & (~np.isinf(ci_upper)) & (~np.isinf(ci_lower))

x_clean = x[valid_mask]
y_clean = y[valid_mask]
ci_upper_clean = ci_upper[valid_mask]
ci_lower_clean = ci_lower[valid_mask]

# Step 8: Spline smoothing (degree k=5)
x_smooth = np.linspace(x_clean.min(), x_clean.max(), 300)

mean_spline = make_interp_spline(x_clean, y_clean, k=5)
upper_spline = make_interp_spline(x_clean, ci_upper_clean, k=5)
lower_spline = make_interp_spline(x_clean, ci_lower_clean, k=5)

y_smooth = mean_spline(x_smooth)
ci_upper_smooth = upper_spline(x_smooth)
ci_lower_smooth = lower_spline(x_smooth)

# Step 9: Plot
plt.figure(figsize=(12, 6))

# Scatter original points (filtered, no smoothing)
plt.scatter(x_clean, y_clean, alpha=0.7, color='blue', label="Mean per Age", s = 25)

# Smoothed mean curve
plt.plot(x_smooth, y_smooth, color="blue", linewidth=1.5, label="Smoothed Mean")

# Smoothed confidence interval
plt.fill_between(x_smooth, ci_lower_smooth, ci_upper_smooth, color="lightblue", alpha=0.4, label="95% CI")

plt.xlabel("Age")
plt.ylabel("Average Imaginary Alpha Coherence (8–13 Hz)")
plt.title("Average Imaginary Alpha Coherence across Age")
plt.grid(True)
plt.legend()
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()


### Plots gender specific mean imaginary coherence across age 

In [None]:
# Step 1: Create DataFrame from the imaginary coherence dict
df_imag_coh = pd.DataFrame({
    "subject_id": list(subject_coherence_imaginary.keys()),
    "imaginary_coherence": list(subject_coherence_imaginary.values())
})

# Step 2: Merge with metadata to get ages and gender
metadata["subject_id"] = metadata["subject_id"].astype(str)
df_imag_coh["subject_id"] = df_imag_coh["subject_id"].astype(str)
merged_imag = pd.merge(df_imag_coh, metadata[["subject_id", "age", "sex"]], on="subject_id", how="inner")

# Define colors and labels
gender_colors = {"female": "blue", "male": "green"}
gender_labels = {"female": "Female", "male": "Male"}

plt.figure(figsize=(14, 7))

for sex in ["female", "male"]:
    # Filter by gender (case-insensitive)
    data_gender = merged_imag[merged_imag["sex"].str.lower() == sex]
    
    # Remove outliers
    mean_coh = data_gender["imaginary_coherence"].mean()
    std_coh = data_gender["imaginary_coherence"].std()
    lower_bound = mean_coh - 3 * std_coh
    upper_bound = mean_coh + 3 * std_coh

    filtered = data_gender[
        (data_gender["imaginary_coherence"] >= lower_bound) &
        (data_gender["imaginary_coherence"] <= upper_bound)
    ]

    # Group by age
    grouped = filtered.groupby("age").agg(
        mean_coherence=("imaginary_coherence", "mean"),
        std_coherence=("imaginary_coherence", "std"),
        n=("imaginary_coherence", "count")
    ).reset_index()

    # Calculate 95% CI
    grouped["sem"] = grouped["std_coherence"] / np.sqrt(grouped["n"])
    grouped["ci_upper"] = grouped["mean_coherence"] + 1.96 * grouped["sem"]
    grouped["ci_lower"] = grouped["mean_coherence"] - 1.96 * grouped["sem"]

    # Sort by age and clean NaNs/Infs
    grouped_sorted = grouped.sort_values("age")
    x = grouped_sorted["age"].values
    y = grouped_sorted["mean_coherence"].values
    ci_upper = grouped_sorted["ci_upper"].values
    ci_lower = grouped_sorted["ci_lower"].values

    valid_mask = (~np.isnan(x)) & (~np.isnan(y)) & (~np.isnan(ci_upper)) & (~np.isnan(ci_lower)) & \
                 (~np.isinf(x)) & (~np.isinf(y)) & (~np.isinf(ci_upper)) & (~np.isinf(ci_lower))

    x_clean = x[valid_mask]
    y_clean = y[valid_mask]
    ci_upper_clean = ci_upper[valid_mask]
    ci_lower_clean = ci_lower[valid_mask]

    # Spline smoothing (k=5)
    x_smooth = np.linspace(x_clean.min(), x_clean.max(), 300)
    mean_spline = make_interp_spline(x_clean, y_clean, k=5)
    upper_spline = make_interp_spline(x_clean, ci_upper_clean, k=5)
    lower_spline = make_interp_spline(x_clean, ci_lower_clean, k=5)

    y_smooth = mean_spline(x_smooth)
    ci_upper_smooth = upper_spline(x_smooth)
    ci_lower_smooth = lower_spline(x_smooth)

    # Plot scatter (no legend)
    plt.scatter(x_clean, y_clean, alpha=0.6, color=gender_colors[sex], s=25)

    # Plot confidence interval (no legend)
    plt.fill_between(x_smooth, ci_lower_smooth, ci_upper_smooth, color=gender_colors[sex], alpha=0.2)

    # Plot smoothed mean line (with legend)
    plt.plot(x_smooth, y_smooth, color=gender_colors[sex], linewidth=1.5, label=gender_labels[sex])

plt.xlabel("Age")
plt.ylabel("Average Imaginary Alpha Coherence (8–13 Hz)")
plt.title("Average Imaginary Alpha Coherence across Age by Sex")
plt.grid(True)
plt.legend()
plt.xticks(np.arange(0,95,10))
plt.tight_layout()
plt.show()
