* This script is designed to extract and analyze aperiodic parameters from FOOOF-fitted PSD data.
* It includes data validation, where data with poor model fit (r-squared < 0.7) is excluded.
* The script processes data stored in MATLAB files, performs analysis, and saves the results in Excel and MATLAB formats.

In [None]:
# Importing Necessary Libraries
import os
import numpy as np
import pandas as pd
import scipy.io as spio
from openpyxl import Workbook

## Loading the FOOOF-fitted PSD data

In [None]:
# Specify the directory where the foof parameter MATLAB data files are located
data_dir = ''
file_list = [file.name for file in os.scandir(data_dir) if file.is_file()]

## Identifying Low R-Squared Values
In this section the script was written to:
* Extract subjects and channels where the model fit is below the threshold (r-squared < 0.7).
* Store details of these low r-squared values for inspection.
* Read channel information from a CSV file and merge with the low r-squared data.
* Calculate the number of low r-squared channels per subject.
* Save the data into an Excel file with separate sheets:
    * For all IDs and channels with r-squared lower than 0.7
    * Number of channels with low r-squared per subject

In [None]:
# Initialize an empty list for low r-squared values
low_rsq = []

# Define the threshold r-squared values
rsq_threshold = 0.7

# Iterate over each MATLAB data file
for file_name in file_list:
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params']  # Aperiodic Parameters: offset, exponent
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channel_names = ap_params.dtype.names
    subject_name = file_name.split('_parameters')[0]

    # Iterate over each channel
    for ch in range(len(channel_names)):
        rsq = r_squared[0][0][ch][0][0]
        if rsq < rsq_threshold:
            # Add details to the low_rsq list
            low_rsq.append([subject_name, rsq, ch+1])

# Convert the list to a DataFrame
low_rsq_df = pd.DataFrame(low_rsq, columns=['Subject_ID', 'R_Squared', 'Channel_Number'])

# Read channel information and merge with low r-squared data
channels = pd.read_csv('')  # Update the path accordingly
channels['Channel_Number'] = range(1, 63)  # Assuming 62 channels
channels.head()

# Merging the data
low_rsq_df = low_rsq_df.merge(channels, on='Channel_Number', how='left')
low_rsq_df.head()

In [None]:
# Calculating the low r-squared values per person
low_rsq_subjects = low_rsq_df['Subject_ID'].value_counts().reset_index()
low_rsq_subjects.columns = ['Subject_ID', 'Low_Rsq_Channel_Count']
low_rsq_subjects.head()

In [None]:
# Define data frames to be saved
data_frames = {
    'Low_Rsq_Details': low_rsq_df,
    'Low_Rsq_Count_Per_Subject': low_rsq_subjects,
    'Channel_Details': channels,
}

# Save to Excel
excel_file_name = 'low_rsq.xlsx'
with pd.ExcelWriter(excel_file_name) as writer:
    for sheet_name, data_frame in data_frames.items():
        data_frame.to_excel(writer, sheet_name=sheet_name, index=False)

## Extracting and Slope and Offset Parameters (Each Subject)
This part of the script aims to:
* Calculate median and mean slope and offset values for each subject.
* Exclude specific subjects and channels as defined.
* Save the results in Excel and MATLAB formats.

In [None]:
# Specify the subjects and channels to exclude
excluded_channels = [{'name': '', 'channel': ''}]

**For Excel format**

In [None]:
# Create empty lists to store values
slope = []
offset = []

# Iterate over each channel in the data file
for ch, channel in enumerate(channels):
    rsq = r_squared[0][0][ch][0][0]
    off_n_slope = ap_params[0][0][ch][0]

    # Check if the channel should be excluded for the specific subject
    if (any(excluded['name'] == name and excluded['channel'] == channel for excluded in excluded_channels) or rsq < rsq_threshold):
        slope_value = np.nan
        offset_value = np.nan
    else:
        slope_value = off_n_slope[1]
        offset_value = off_n_slope[0]

        # Append values to the list
    offset.append(offset_value)
    slope.append(slope_value)

 # Calculate the median slope value
median_slope = np.nanmedian(slope)
# Calculate the mean slope value
mean_slope = np.nanmean(slope)
# Calculate the median offset value
median_offset = np.nanmedian(offset)
# Calculate the mean offset value
mean_offset = np.nanmean(offset)

Function to split 'ID' column into 'group' and 'id'

In [None]:
def split_id_column(df, id_column):
    
    if id_column not in df.columns:
        raise ValueError(f"The specified id_column '{id_column}' does not exist in the DataFrame.")
    df[['group', 'id']] = df[id_column].str.split('_', expand=True)   
    df = df.drop(id_column, axis=1)
    
    return df

In [None]:
median_mean_slope = split_id_column(median_mean_slope, id_column='ID')
median_mean_offset = split_id_column(median_mean_offset, id_column='ID')
median_mean_slope.head()

In [None]:
# Save median and mean slope and offset to excel 
with pd.ExcelWriter('slope_offset_median_mean.xlsx') as writer:
    median_mean_slope.to_excel(writer, sheet_name='median_mean_slope', index=False)
    median_mean_offset.to_excel(writer, sheet_name='median_mean_offset', index=False)

**For Matlab matrix format**

In [None]:
# DFs to store values
median_mean_slope = pd.DataFrame(columns=['ID', 'median_slope', 'mean_slope'])
median_mean_offset = pd.DataFrame(columns=['ID', 'median_offset', 'mean_offset'])

# Empty matrix to store the slope and offset values
slopes_matrix = np.empty((len(file_list), len(channels)))
offsets_matrix = np.empty((len(file_list), len(channels)))

# Iterate over each MATLAB data file
for i, file_name in enumerate(file_list):
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params']
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channels = ap_params.dtype.names
    name = file_name.split('_parameters')[0]

    # Create empty lists to store values
    slope = []
    offset = []

    # Iterate over each channel in the data file
    for ch, channel in enumerate(channels):
        rsq = r_squared[0][0][ch][0][0]
        off_n_slope = ap_params[0][0][ch][0]

        # Check if the channel should be excluded for the specific subject
        if (any(excluded['name'] == name and excluded['channel'] == channel for excluded in excluded_channels) or rsq < rsq_threshold):
            slope_value = np.nan
            offset_value = np.nan
        else:
            slope_value = off_n_slope[1]
            offset_value = off_n_slope[0]

        # Append values to the list
        offset.append(offset_value)
        slope.append(slope_value)
        slopes_matrix[i, ch] = slope_value
        offsets_matrix[i, ch] = offset_value

    # Calculate the median slope value
    median_slope = np.nanmedian(slope)
    # Calculate the mean slope value
    mean_slope = np.nanmean(slope)
    # Calculate the median offset value
    median_offset = np.nanmedian(offset)
    # Calculate the mean offset value
    mean_offset = np.nanmean(offset)

    # Update the median_mean_slope DataFrame with the subject's ID and slope values
    median_mean_slope.at[i, 'ID'] = name
    median_mean_slope.at[i, 'median_slope'] = median_slope
    median_mean_slope.at[i, 'mean_slope'] = mean_slope
    
    # Update the median_mean_offset DataFrame with the subject's ID and offset values
    median_mean_offset.at[i, 'ID'] = name
    median_mean_offset.at[i, 'median_offset'] = median_offset
    median_mean_offset.at[i, 'mean_offset'] = mean_offset

In [None]:
# Save slope and offset matrixes to matlab structure

data = {'slopes_matrix': slopes_matrix,
        'offsets_matrix': offsets_matrix}

spio.savemat('slope_off_matrix.mat', data)

## Median and Mean Values by Group and Channel
* The following section creates DataFrames for each subject and channel, categorizing them into groups based on subject ID.
* It calculates median values for slopes and offsets for each group/group and channel.
* The results are saved in an Excel file with multiple sheets.

In [None]:
# Create the DataFrame with slope values and modify the index values
slope_ch_id = pd.DataFrame(slopes_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])

# Create the DataFrame with offset values and modify the index values
offset_ch_id = pd.DataFrame(offsets_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])

In [None]:
# Reset the index and rename the column
slope_ch_id = slope_ch_id.reset_index().rename(columns={'index': 'id'})

offset_ch_id = offset_ch_id.reset_index().rename(columns={'index': 'id'})

# Create a mapping dictionary for the 'group' column
group_mapping = {
    (0, 100): 'OC',
    (100, 200): 'IUD',
    (200, 300): 'NCF',
    (300, 400): 'NCL',
    (400, float('inf')): 'M',
}

# Add the 'group' column based on ID values for slope_ch_id DataFrame
slope_ch_id.insert(1, 'group', pd.cut(slope_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))

# Add the 'group' column based on ID values for offset_ch_id DataFrame
offset_ch_id.insert(1, 'group', pd.cut(offset_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))

Median slope and offset values for each group and channel

In [None]:
# Create DataFrames with slope and offset values, indexed by subject ID
slope_df = pd.DataFrame(slopes_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])
offset_df = pd.DataFrame(offsets_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])

# Reset the index and rename the column
slope_df = slope_df.reset_index().rename(columns={'index': 'ID'})
offset_df = offset_df.reset_index().rename(columns={'index': 'ID'})

# Define groups based on subject IDs
group_labels = ['OC', 'IUD', 'NCF', 'NCL', 'M']
group_bins = [0, 100, 200, 300, 400, float('inf')]

# Assign groups to DataFrames
slope_df.insert(1, 'Group', pd.cut(slope_df['ID'], bins=group_bins, labels=group_labels))
offset_df.insert(1, 'Group', pd.cut(offset_df['ID'], bins=group_bins, labels=group_labels))

# Calculate median values for each subject and group
median_slope = slope_df.groupby('Group').median()
median_offset = offset_df.groupby('Group').median()

In [None]:
# Save to Excel
excel_file = 'Slope_offset.xlsx'
with pd.ExcelWriter(excel_file) as writer:
    slope_df.to_excel(writer, sheet_name='Slope_Subject_Channel', index=False)
    offset_df.to_excel(writer, sheet_name='Offset_Subject_Channel', index=False)
    median_slope.to_excel(writer, sheet_name='Median_Slope_by_Group_Channel', index=False)
    median_offset.to_excel(writer, sheet_name='Median_Offset_by_Group_Channel', index=False)