##  Extracting rsq values for each channel and subject + mean values for each subject with/without knee parameter

In [None]:
import os
import numpy as np
import pandas as pd
import scipy.io as spio
from openpyxl import Workbook

In [None]:
# Specify the directory where the MATLAB data files are located 
data_dir = ''
file_list = [file.name for file in os.scandir(data_dir) if file.is_file()]

In [None]:
# empty lists to store low_rsq and rsq values
low_rsq = []
all_rsq = [] 

In [None]:
# file_list.remove('.DS_Store')

* Extracting subjects and channels with low rsq
* THRESHOLD: 0.7 (determined by visual inspection)

**EXTRACTING RSQ & LOW RSQ VALUES**

In [None]:
# Iterate over each MATLAB data file
for file_name in file_list:
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params'] # Aperiodic Parameters: offset, exponent
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channel = ap_params.dtype.names
    name = file_name.split('_parameters')[0]

       # Iterate over each channel in the data file
    for ch in range(len(channel)):
        rsq = r_squared[0][0][ch][0][0]

        all_rsq.append([name, rsq, ch+1]) 
       
        # Check if rsq is below the threshold
        if rsq < 0.7:
            # Add the low_rsq information to the list
            low_rsq.append([name, rsq, ch+1])

In [None]:
# Create a pandas DataFrame to store the results
low_rsq_table = pd.DataFrame(low_rsq, columns = ['id', 'rsq', 'channel_number'])

In [None]:
# low_rsq_table.head()

In [None]:
# Read channels from the CSV file
channels = pd.read_csv('')
# add channel numbers
channels['channel_number'] = range(1, 63)
# channels.head()

In [None]:
low_rsq_table = low_rsq_table.merge(channels, how='left', on='channel_number')

In [None]:
# low_rsq_table.head()

In [None]:
# Calculate the counts of low_rsq per person
people_low_rsq = low_rsq_table['id'].value_counts().reset_index()
people_low_rsq.columns = ['people_low_rsq', 'chan_per_person']

In [None]:
people_low_rsq

### **LOW RSQ**:

**Witout knee**:
* V619: 7 channels with low rsq
* V619: additional channel to remove: 30 (O2)

**With knee**:
* NCF219: 7 chans with low rsq
* V619: 6 chans with low rsq

In [None]:
all_rsq_df = pd.DataFrame(all_rsq, columns = ['id', 'rsq', 'channel_number'])
all_rsq_df.head()

**To save low rsq if needed**

In [None]:
# Save the data to an Excel file: one sheet for all IDs and channels with r2<0.7, another with number of channels per person
data_frames = [
    ('low_rsq_table', low_rsq_table),
    ('people_low_rsq', people_low_rsq),
    ('channels', channels),
]
with pd.ExcelWriter('low_rsq_with_knee_2023_12_18.xlsx') as writer:
    for sheet_name, data_frame in data_frames:
        data_frame.to_excel(writer, sheet_name=sheet_name, index=False)

# EXTRACTING PARAMETERS
Other parameters are left just in case

### **MEAN RSQ + SD FOR EACH SUBJECT**

In [None]:
# Create an empty DataFrame to store the mean slope values
median_mean_slope = pd.DataFrame(columns=['ID', 'median_slope', 'mean_slope'])
median_mean_offset = pd.DataFrame(columns=['ID', 'median_offset', 'mean_offset'])
rsq_mean_sd = pd.DataFrame(columns=['ID', 'mean_rsq', 'std_rsq'])

# Specify the subjects and channels to exclude
excluded_channels = [{'name': 'V_619', 'channel': 'channel_30'}]

# Initialize an empty matrix to store the slope and offset values
slopes_matrix = np.empty((len(file_list), len(channels)))
offsets_matrix = np.empty((len(file_list), len(channels)))
rsq_matrix = np.empty((len(file_list), len(channels)))

# Iterate over each MATLAB data file
for i, file_name in enumerate(file_list):
    # Load the MATLAB data file
    mat_data = spio.loadmat(os.path.join(data_dir, file_name))
    ap_params = mat_data['ap_params']
    r_squared = mat_data['r_squared']

    # Get the channel names and subject name from the file name
    channels = ap_params.dtype.names
    name = file_name.split('_parameters')[0]

    # Initialize lists for collecting values
    slope_values = []
    offset_values = []
    rsq_values = []

    # Iterate over each channel in the data file
    for ch, channel in enumerate(channels):
        rsq_value = r_squared[0][0][ch][0][0]
        off_n_slope = ap_params[0][0][ch][0]

        # Check if the channel should be excluded for the specific subject
        if any(excluded['name'] == name and excluded['channel'] == channel for excluded in excluded_channels):
            slope_value = np.nan
            offset_value = np.nan
            rsq_value = np.nan
        else:
            slope_value = off_n_slope[1]
            offset_value = off_n_slope[0]

        # Append values to respective lists
        offset_values.append(offset_value)
        slope_values.append(slope_value)
        rsq_values.append(rsq_value)

        # Store values in matrices
        slopes_matrix[i, ch] = slope_value
        offsets_matrix[i, ch] = offset_value
        rsq_matrix[i, ch] = rsq_value

    # Calculate statistics for slope, offset, and rsq
    median_slope = np.nanmedian(slope_values)
    mean_slope = np.nanmean(slope_values)
    median_offset = np.nanmedian(offset_values)
    mean_offset = np.nanmean(offset_values)
    mean_rsq = np.nanmean(rsq_values)
    std_rsq = np.nanstd(rsq_values)

    # Update DataFrames with calculated values
    median_mean_slope.loc[i] = [name, median_slope, mean_slope]
    median_mean_offset.loc[i] = [name, median_offset, mean_offset]
    rsq_mean_sd.loc[i] = [name, mean_rsq, std_rsq]

In [None]:
# Save rsq vales to excel
with pd.ExcelWriter('rsq_mean_sd_for_each_subj_WITH_KNEE.xlsx') as writer:
    rsq_mean_sd.to_excel(writer, sheet_name='WITH_KNEE', index=False)

### To save other parameters if needed

In [None]:
# Save median and mean slope and offset to excel 
with pd.ExcelWriter('slope_off_median_mean.xlsx') as writer:
    median_mean_slope.to_excel(writer, sheet_name='median_mean_slope', index=False)
    median_mean_offset.to_excel(writer, sheet_name='median_mean_offset', index=False)

In [None]:
# Save slope and offset matrixes to matlab structure

data = {'slopes_matrix': slopes_matrix,
        'offsets_matrix': offsets_matrix}

spio.savemat('slope_off_matrix.mat', data)

In [None]:
# Create the DataFrame with slope values and modify the index values
slope_ch_id = pd.DataFrame(slopes_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])

# Create the DataFrame with offset values and modify the index values
offset_ch_id = pd.DataFrame(offsets_matrix, columns=channels, index=[int(''.join(filter(str.isdigit, name.split('_parameters')[0]))) for name in file_list])
