In [1]:
import os
import numpy as np
import pandas as pd
import scipy.io as spio
from openpyxl import Workbook

In [2]:
os.listdir('/Users/linagladutyte/Documents/LABES/RAMYBE/FOOF/alpha_parameters/alpha_peaks_mat')

['alpha_peaks_matslope_mtx_05.mat',
 'alpha_peaks_matall_IDs_174.mat',
 'alpha_peaks_matalpha_pow_mtx_05.mat',
 'alpha_peaks_matchannel_locations_62.mat',
 'alpha_peaks_matchannel_labels_62.mat',
 'alpha_peaks_matalpha_params_findpeaks_detrend_3_40_7_13_threshold_05.mat',
 'alpha_peaks_matalpha_freq_mtx_05.mat',
 'alpha_peaks_matoffset_mtx_05.mat']

In [4]:
# load matlab matrixes, extract parameters
iaf_mat = spio.loadmat('alpha_peaks_mat/alpha_peaks_matalpha_freq_mtx_05.mat')['iaf']
power_mat = spio.loadmat('alpha_peaks_mat/alpha_peaks_matalpha_pow_mtx_05.mat')['broadpow']

subj_id_mat = spio.loadmat('alpha_peaks_mat/alpha_peaks_matall_IDs_174.mat')['ID_all'][0]
channels_mat = spio.loadmat('alpha_peaks_mat/alpha_peaks_matchannel_labels_62.mat')['chan_label'][0]

In [6]:
names = []
for x in range(len(subj_id_mat)):
    name = subj_id_mat[x][0]
    names.append(name)

In [7]:
channels = []
for y in range(len(channels_mat)):
    channel = channels_mat[y][0]
    channels.append(channel)

In [8]:
# Convert the list to a numpy array
iaf = np.array(iaf_mat)
power = np.array(power_mat)

# Create the DataFrame with slope values and modify the index values
iaf_ch_id = pd.DataFrame(iaf, columns=channels, index=[int(''.join(filter(str.isdigit, name))) for name in names])
power_ch_id = pd.DataFrame(power, columns=channels, index=[int(''.join(filter(str.isdigit, name))) for name in names])

In [9]:
#VALUES FOR EACH SUBJECT AND CHANNEL 
# Reset the index and rename the column
iaf_ch_id = iaf_ch_id.reset_index().rename(columns={'index': 'id'})
power_ch_id = power_ch_id.reset_index().rename(columns={'index': 'id'})


# Create a mapping dictionary for the 'group' column
group_mapping = {
    (0, 100): 'OC',
    (100, 200): 'IUD',
    (200, 300): 'NCF',
    (300, 400): 'NCL',
    (400, float('inf')): 'M',
}

# Add the 'group' column based on ID values
iaf_ch_id.insert(1, 'group', pd.cut(iaf_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))
power_ch_id.insert(1, 'group', pd.cut(power_ch_id['id'], bins=[0, 100, 200, 300, 400, float('inf')], labels=['OC', 'IUD', 'NCF', 'NCL', 'M']))

In [10]:
iaf_nan = []
nan_threshold = 31
filtered_data_iaf = []

for index, row in iaf_ch_id.iterrows():
    subject_id = row['id']
    nan_count = row.isnull().sum()
    
    if nan_count <= nan_threshold:
        filtered_data_iaf.append(row)
    else:
        iaf_nan.append((subject_id, nan_count))

filtered_iaf = pd.DataFrame(filtered_data_iaf)

print("Subjects with more than 31 NaN values:")
for subject, nan_count in iaf_nan:
    print(f"Subject ID: {subject}, NaN count: {nan_count}")


Subjects with more than 31 NaN values:
Subject ID: 221, NaN count: 39
Subject ID: 13, NaN count: 58
Subject ID: 118, NaN count: 52
Subject ID: 122, NaN count: 62


In [11]:
iaf_nan = []
nan_threshold = 31
filtered_data_pw = []

for index, row in power_ch_id.iterrows():
    subject_id = row['id']
    nan_count = row.isnull().sum()
    
    if nan_count <= nan_threshold:
        filtered_data_pw.append(row)
    else:
        iaf_nan.append((subject_id, nan_count))

filtered_power = pd.DataFrame(filtered_data_pw)

print("Subjects with more than 31 NaN values:")
for subject, nan_count in iaf_nan:
    print(f"Subject ID: {subject}, NaN count: {nan_count}")

Subjects with more than 31 NaN values:
Subject ID: 221, NaN count: 39
Subject ID: 13, NaN count: 58
Subject ID: 118, NaN count: 52
Subject ID: 122, NaN count: 62


In [12]:
#MEDIAN VALUES FOR EACH SUBJECT 
# selecting 'id' and 'group' columns
id_group_median_iaf = filtered_iaf.iloc[:, 0:2]
# calculating and adding 'median iaf' column
id_group_median_iaf['median_iaf'] = filtered_iaf.iloc[:, 2:65].median(axis=1)

id_group_median_power = filtered_power.iloc[:, 0:2]
id_group_median_power['median_power'] = filtered_power.iloc[:, 2:65].median(axis=1)

In [152]:
# Remove the 'id' column
iaf_ch_id_filtered = filtered_iaf.drop('id', axis=1)
power_ch_id_filtered = filtered_power.drop('id', axis=1)

# Calculate the median values for each channel based on the 'group' column
median_iaf_ch = iaf_ch_id_filtered.groupby('group').median()
median_power_ch = power_ch_id_filtered.groupby('group').median()

# Reset the index and rename the columns
median_iaf_ch = median_iaf_ch.reset_index().rename(columns={'index': 'group'})
median_power_ch = median_power_ch.reset_index().rename(columns={'index': 'group'})

# Convert the 'group' column to category type
median_iaf_ch['group'] = median_iaf_ch['group'].astype('category')
median_power_ch['group'] = median_power_ch['group'].astype('category')

In [154]:
data_frames = [
    ('iaf_each_ch_sbj_original', iaf_ch_id),
    ('iaf_each_ch_sbj_removed_subj', iaf_ch_id_filtered),
    ('median_iaf_groups', median_iaf_ch),
    ('id_group_median_iaf', id_group_median_iaf),
    ('power_each_ch_sbj_original', power_ch_id),
    ('power_each_ch_sbj_removed_subj', power_ch_id_filtered),
    ('power_slope_groups', median_power_ch),
    ('id_group_median_power', id_group_median_power),
]

with pd.ExcelWriter('iaf_power_ch_groups.xlsx') as writer:
    for sheet_name, data_frame in data_frames:
        data_frame.to_excel(writer, sheet_name=sheet_name, index=False)
