This script was written to extract alpha band parameters (individual alpha peak (IAF) and alpha power (lower and upper), peak width, peak start, and peak end) from the .mat data. 
* The alpha band parameters were calculated using a custom Matlab script:
    * 'adapted_el_a1_a2_bandpower_non_detrended_.m' to calculate parameters on non-detrended PSD
    * each of the parameters was saved to a separate .mat file

The second part of the script is for data visualization between groups

In [None]:
import numpy as np
import pandas as pd
import scipy.io as spio
from openpyxl import Workbook
import seaborn as sns
import matplotlib.pyplot as plt

## Loading and Preparing the Data

In [None]:
# Define the file paths and parameter names
# update the file path to .mat files 
file_paths = {
    'iaf': '',
    'broadpow': '',
    'lowerpow': '',
    'upperpow': '',
    'peakwidth': '',
    'peakstart': '',
    'peakend': ''
}

In [None]:
# Load the MATLAB matrices into a dictionary
data = {}
for param_name, file_path in file_paths.items():
    data[param_name] = spio.loadmat(file_path)[param_name]

In [None]:
# Extract additional data
subj_id_mat = spio.loadmat('')['ID_all'][0]
channels_mat = spio.loadmat('')['chan_label'][0]

In [None]:
# Convert subject and channel names to lists
names = [str(name[0]) for name in subj_id_mat]
channels = [str(channel[0]) for channel in channels_mat]

In [None]:
# Create a dictionary to store DataFrames
param_data = {}

# Create DataFrames and apply the common operations for each parameter
for param_name in file_paths.keys():
    param_matrix = data[param_name]
    param_df = pd.DataFrame(param_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name))) for name in names])
    
    # Reset the index and rename the column
    param_df = param_df.reset_index().rename(columns={'index': 'id'})
    
    # Create a mapping dictionary for the 'group' column
    group_mapping = {
        (0, 100): 'OC',
        (100, 200): 'IUD',
        (200, 300): 'NCF',
        (300, 400): 'NCL',
        (400, float('inf')): 'M',
    }
    
    # Add the 'group' column based on ID values
    param_df.insert(1, 'group', pd.cut(param_df['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))
    
    # Store the DataFrame in the dictionary with its parameter name as the key
    param_data[param_name] = param_df

### Subject data where alpha peak was not detected in more than 50% of the channels were excluded from the analysis.

In [None]:
iaf_nan = []
nan_threshold = 31
subjects_exclude = []

for index, row in param_data['iaf'].iterrows():
    subject_id = row['id']
    nan_count = row.isnull().sum()
    
    if nan_count > nan_threshold:
        subjects_exclude.append(subject_id)
        print(f"Subject ID: {subject_id}, NaN count: {nan_count}")

In [None]:
# Create a dictionary to store the filtered DataFrames
filtered_data = {}

# Iterate over the DataFrames in the param_data dictionary
for param_name, param_df in param_data.items():
    # Filter out rows with subjects in the subjects_exclude list
    filtered_param_df = param_df[~param_df['id'].isin(subjects_exclude)]
    
    # Store the filtered DataFrame in the filtered_data dictionary
    filtered_data[param_name] = filtered_param_df

### Extracting data from specified channel(s)

In [None]:
# List of parameters
parameters = ['iaf', 'broadpow', 'lowerpow', 'upperpow', 'peakwidth']

# Specify the channel
channel = ''

# Initialize an empty dictionary to store the extracted columns for each parameter
data_channel = {}

# Loop through the parameters and extract the 'id', 'group', and specified channel columns
for param in parameters:
    parameter = filtered_data[param][['id', 'group', channel]]
    data_channel[param] = parameter

In [None]:
# Save values from pz for each parameter to separate Excel sheets
with pd.ExcelWriter(f'parameters_{channel}_group.xlsx') as writer:
    for param_name, param in data_channel.items():
        param.to_excel(writer, sheet_name=param_name, index=False)

### Calculating Median Parameter Values (each subject)

In [None]:
# Create a dictionary to store median DataFrames for each parameter
median_subj_data = {}

# Calculate median values for each parameter
for param_name, param_df in filtered_data.items():
    id_group_median = param_df.iloc[:, 0:2]
    id_group_median[f'median_{param_name}'] = param_df.iloc[:, 2:].median(axis=1)
    median_subj_data[param_name] = id_group_median

In [None]:
# Save median values for each parameter to separate Excel sheets
with pd.ExcelWriter('median_subj_group.xlsx') as writer:
    for param_name, median_df in median_subj_data.items():
        median_df.to_excel(writer, sheet_name=param_name, index=False)

In [None]:
# Create a dictionary to store median DataFrames for each parameter and channel
median_channel_data = {}

# Iterate through the filtered_data dictionary for each parameter
for param_name, param_df in filtered_data.items():
    # Remove the 'id' column
    param_ch_id_filtered = param_df.drop('id', axis=1)
    
    # Calculate the median values for each channel based on the 'group' column
    median_ch = param_ch_id_filtered.groupby('group').median()
    
    # Reset the index and rename the columns
    median_ch = median_ch.reset_index().rename(columns={'index': 'group'})
    
    # Convert the 'group' column to category type
    median_ch['group'] = median_ch['group'].astype('category')
    
    # Store the median DataFrame for the parameter and channel
    median_channel_data[param_name] = median_ch

In [None]:
# Save median values for each parameter to separate Excel sheets
with pd.ExcelWriter('median_chan_group.xlsx') as writer:
    for param_name, median_ch_df in median_channel_data.items():
        median_ch_df.to_excel(writer, sheet_name=param_name, index=False)

# VISUALIZATION

### Function to plot parameters:

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_violin_with_scatter(param_name, median_subj_data, title=None, x_label=None, y_label=None):
    """
    Plot a violin plot with a scatterplot on top for a specified parameter.

    Args:
        param_name (str): The name of the parameter to plot (e.g., 'iaf', 'alpha1', 'alpha2', 'power', 'width').
        median_subj_data (dict): A dictionary containing DataFrames for different parameters.
        title (str, optional): Custom title for the plot.
        x_label (str, optional): Custom label for the x-axis.
        y_label (str, optional): Custom label for the y-axis.

    """
    # Extract the DataFrame for the specified parameter
    param_data = median_subj_data[param_name]

    # Set the figure size for the plot
    plt.figure(figsize=(12, 8))

    # Create a violin plot for the parameter
    sns.violinplot(x='group', y=f'median_{param_name}', data=param_data)

    # Create a scatterplot on top
    sns.stripplot(x='group', y=f'median_{param_name}', data=param_data, color='red', jitter=True, size=5)

    # Set the title and labels
    if title:
        plt.title(title)
    else:
        plt.title(f'Median {param_name.upper()} Value')  # Default title with PARAM_NAME in uppercase
    
    if x_label:
        plt.xlabel(x_label)
    else:
        plt.xlabel('Group')
    
    if y_label:
        plt.ylabel(y_label)
    else:
        plt.ylabel(f'Median {param_name.upper()} Value')  # Default y-axis label

    # Show the plot
    plt.show()


In [None]:
def plot_boxplot_with_scatter(param_name, median_subj_data, title=None, x_label=None, y_label=None):
    """
    Plot a boxplot with a scatterplot on top for a specified parameter.

    Args:
        param_name (str): The name of the parameter to plot (e.g., 'iaf', 'alpha1', 'alpha2', 'power', 'width').
        median_subj_data (dict): A dictionary containing DataFrames for different parameters.
        title (str, optional): Custom title for the plot.
        x_label (str, optional): Custom label for the x-axis.
        y_label (str, optional): Custom label for the y-axis.

    """
    # Extract the DataFrame for the specified parameter
    param_data = median_subj_data[param_name]

    # Set the figure size for the plot
    plt.figure(figsize=(12, 8))

    # Create a boxplot for the parameter
    sns.boxplot(x='group', y=f'median_{param_name}', data=param_data, width=0.5, showfliers=False)

    # Create a scatterplot on top
    sns.stripplot(x='group', y=f'median_{param_name}', data=param_data, color='red', jitter=True, size=5)

    # Set the title and labels
    if title:
        plt.title(title)
    else:
        plt.title(f'Median {param_name.upper()}')  # Default title with PARAM_NAME in uppercase
    
    if x_label:
        plt.xlabel(x_label)
    else:
        plt.xlabel('Group')
    
    if y_label:
        plt.ylabel(y_label)
    else:
        plt.ylabel(f'Median {param_name.upper()} Value')  # Default y-axis label

    # Show the plot
    plt.show()


## IAF

In [None]:
plot_boxplot_with_scatter('iaf', median_subj_data, y_label = 'IAF (Hz)')

## Alpha power

In [None]:
plot_boxplot_with_scatter('broadpow', median_subj_data, title = 'Median alpha power', y_label= 'Alpha power (µV²)')

## Lower alpha power

In [None]:
plot_boxplot_with_scatter('lowerpow', median_subj_data, title = 'Median lower alpha power', y_label= 'Alpha1 power (µV²)')

## Upper alpha power

In [None]:
plot_boxplot_with_scatter('upperpow', median_subj_data, title = 'Median upper alpha power', y_label= 'Alpha2 power (µV²)')

## Alpha width

In [None]:
plot_boxplot_with_scatter('peakwidth', median_subj_data, title = 'Meadian alpha peak width', y_label= 'Peak width (Hz)')