In [24]:
import os
import numpy as np
import pandas as pd
import scipy.io as spio
from openpyxl import Workbook

In [25]:
# Specify the directory where the MATLAB data files are located
data_dir = '/Users/linagladutyte/Documents/LABES/RAMYBE/FOOF/fooof_params/'
file_list = [file.name for file in os.scandir(data_dir) if file.is_file()]

In [26]:
# Initialize an empty list to store low_rsq values
low_rsq = []

In [27]:
#Extracting subjects and channels with low rsq
#THRESHOLD: 0.7 (determined by visual inspection)

In [28]:
# Iterate over each MATLAB data file
for file_name in file_list:
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params'] # Aperiodic Parameters: offset, exponent
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channel = ap_params.dtype.names
    name = file_name.split('_parameters')[0]

       # Iterate over each channel in the data file
    for ch in range(len(channel)):
        rsq = r_squared[0][0][ch][0][0]
        
        # Check if rsq is below the threshold
        if rsq < 0.7:
            # Add the low_rsq information to the list
            low_rsq.append([name, rsq, ch+1])

In [29]:
# Create a pandas DataFrame to store the results
low_rsq_table = pd.DataFrame(low_rsq, columns = ['id', 'rsq', 'channel_number'])

In [30]:
low_rsq_table.head()

Unnamed: 0,id,rsq,channel_number
0,UID_118,0.606576,45
1,UID_118,0.617038,58
2,OC_14,0.646972,26
3,V_604,0.651457,3
4,V_604,0.499785,18


In [31]:
# Read channels from CSV file
channels = pd.read_csv('/Users/linagladutyte/Documents/LABES/RAMYBE/FOOF/channels.csv')
#add channel numbers
channels['channel_number'] = range(1, 63)
channels.head()

Unnamed: 0,channel_name,channel_number
0,Fp1,1
1,Fpz,2
2,Fp2,3
3,F7,4
4,F3,5


In [32]:
low_rsq_table = low_rsq_table.merge(channels, how='left', on='channel_number')

In [33]:
low_rsq_table.head()

Unnamed: 0,id,rsq,channel_number,channel_name
0,UID_118,0.606576,45,C6
1,UID_118,0.617038,58,FT8
2,OC_14,0.646972,26,P8
3,V_604,0.651457,3,Fp2
4,V_604,0.499785,18,CP5


In [34]:
# Calculate the counts of low_rsq per person
people_low_rsq = low_rsq_table['id'].value_counts().reset_index()
people_low_rsq.columns = ['people_low_rsq', 'chan_per_person']

In [35]:
people_low_rsq.head()

Unnamed: 0,people_low_rsq,chan_per_person
0,V_619,7
1,OC_6,5
2,NCF_229,4
3,UID_112,3
4,UID_118,2


In [36]:
# Save the data to an Excel file: one sheet for all IDs and channels with r2<0.7, another with number of channels per person
data_frames = [
    ('low_rsq_table', low_rsq_table),
    ('people_low_rsq', people_low_rsq),
    ('channels', channels),
]
with pd.ExcelWriter('low_rsq_2023_07_12.xlsx') as writer:
    for sheet_name, data_frame in data_frames:
        data_frame.to_excel(writer, sheet_name=sheet_name, index=False)

In [None]:
# V619: 7 channels with low rsq
# V619: additional channel to remove: 30 (O2)

In [None]:
#EXTRACTING SLOPE

In [16]:
# Create an empty DataFrame to store the mean slope values
median_mean_slope = pd.DataFrame(columns=['ID', 'median_slope', 'mean_slope'])
median_mean_offset = pd.DataFrame(columns=['ID', 'median_offset', 'mean_offset'])

# Specify the subjects and channels to exclude
excluded_channels = [{'name': 'V_619', 'channel': 'channel_30'}]

# Initialize an empty matrix to store the slope and offset values
slopes_matrix = np.empty((len(file_list), len(channels)))
offsets_matrix = np.empty((len(file_list), len(channels)))

# Iterate over each MATLAB data file
for i, file_name in enumerate(file_list):
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params']
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channels = ap_params.dtype.names
    name = file_name.split('_parameters')[0]

    # Create empty lists to store values
    slope = []
    offset = []

    # Iterate over each channel in the data file
    for ch, channel in enumerate(channels):
        rsq = r_squared[0][0][ch][0][0]
        off_n_slope = ap_params[0][0][ch][0]

        # Check if the channel should be excluded for the specific subject
        if (any(excluded['name'] == name and excluded['channel'] == channel for excluded in excluded_channels) or rsq < 0.7):
            slope_value = np.nan
            offset_value = np.nan
        else:
            slope_value = off_n_slope[1]
            offset_value = off_n_slope[0]

        # Append the offset value to the list
        offset.append(offset_value)
        # Append the slope value to the list
        slope.append(slope_value)
        # Store the slope value in the slopes matrix
        slopes_matrix[i, ch] = slope_value
        # Store the slope value in the slopes matrix
        offsets_matrix[i, ch] = offset_value

    # Calculate the median slope value
    median_slope = np.nanmedian(slope, axis=1)
    # Calculate the mean slope value
    mean_slope = np.nanmean(slope, axis=1)
    # Calculate the median offset value
    median_offset = np.nanmedian(offset, axis=1)
    # Calculate the mean offset value
    mean_offset = np.nanmean(offset)

    # Update the median_mean_slope DataFrame with the subject's ID and slope values
    median_mean_slope.at[i, 'ID'] = name
    median_mean_slope.at[i, 'median_slope'] = median_slope
    median_mean_slope.at[i, 'mean_slope'] = mean_slope
    
    # Update the median_mean_offset DataFrame with the subject's ID and offset values
    median_mean_offset.at[i, 'ID'] = name
    median_mean_offset.at[i, 'median_offset'] = median_offset
    median_mean_offset.at[i, 'mean_offset'] = mean_offset

In [None]:
# Save median and mean slope and offset to excel 
with pd.ExcelWriter('slope_off_median_mean.xlsx') as writer:
    median_mean_slope.to_excel(writer, sheet_name='median_mean_slope', index=False)
    median_mean_offset.to_excel(writer, sheet_name='median_mean_offset', index=False)

In [17]:
median_mean_slope.head()

Unnamed: 0,ID,median_slope,mean_slope
0,NCF_207,1.223921,1.192372
1,NCF_212,1.600608,1.564022
2,UID_118,0.630236,0.576662
3,NCG_337,1.002135,0.936301
4,UID_110,1.314761,1.235825


In [18]:
# Save slope and offset matrixes to matlab structure

data = {'slopes_matrix': slopes_matrix,
        'offsets_matrix': offsets_matrix}

spio.savemat('slope_off_matrix.mat', data)

In [19]:
# Create the DataFrame with slope values and modify the index values
slope_ch_id = pd.DataFrame(slopes_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])

# Create the DataFrame with offset values and modify the index values
offset_ch_id = pd.DataFrame(offsets_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])


In [20]:
#VALUES FOR EACH SUBJECT AND CHANNEL 
# Reset the index and rename the column
slope_ch_id = slope_ch_id.reset_index().rename(columns={'index': 'id'})

offset_ch_id = offset_ch_id.reset_index().rename(columns={'index': 'id'})

# Create a mapping dictionary for the 'group' column
group_mapping = {
    (0, 100): 'OC',
    (100, 200): 'IUD',
    (200, 300): 'NCF',
    (300, 400): 'NCL',
    (400, float('inf')): 'M',
}

# Add the 'group' column based on ID values for slope_ch_id DataFrame
slope_ch_id.insert(1, 'group', pd.cut(slope_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))

# Add the 'group' column based on ID values for offset_ch_id DataFrame
offset_ch_id.insert(1, 'group', pd.cut(offset_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))

In [21]:
#MEDIAN VALUES FOR EACH SUBJECT 
id_group_median_slope = slope_ch_id.iloc[:, 0:2]
id_group_median_slope['median_slope'] = slope_ch_id.iloc[:, 2:65].median(axis=1)
id_group_median_offset = slope_ch_id.iloc[:, 0:2]
id_group_median_offset['median_offset'] = offset_ch_id.iloc[:, 2:65].median(axis=1)

In [22]:
# Remove the 'id' column from slope_ch_id and offset_ch_id
slope_ch_id_filtered = slope_ch_id.drop('id', axis=1)
offset_ch_id_filtered = offset_ch_id.drop('id', axis=1)

# Calculate the median slope values for each channel based on the 'group' column
median_slope_ch = slope_ch_id_filtered.groupby('group').median()

# Calculate the median offset values for each channel based on the 'group' column
median_offset_ch = offset_ch_id_filtered.groupby('group').median()

# Reset the index and rename the columns
median_slope_ch = median_slope_ch.reset_index().rename(columns={'index': 'group'})
median_offset_ch = median_offset_ch.reset_index().rename(columns={'index': 'group'})

# Convert the 'group' column to category type
median_slope_ch['group'] = median_slope_ch['group'].astype('category')
median_offset_ch['group'] = median_offset_ch['group'].astype('category')

In [40]:
data_frames = [
    ('slope_each_ch_sbj', slope_ch_id),
    ('median_slope_groups', median_slope_ch),
    ('id_group_median_slope', id_group_median_slope),
    ('offset_each_ch_sbj', offset_ch_id),
    ('offset_slope_groups', median_offset_ch),
    ('id_group_median_offset', id_group_median_offset),
]

with pd.ExcelWriter('slope_off_ch_groups.xlsx') as writer:
    for sheet_name, data_frame in data_frames:
        data_frame.to_excel(writer, sheet_name=sheet_name, index=False)
