# OTKA1 post analysis

In [None]:
import pickle
import re
import os
from pathlib import Path

import mne
import numpy as np
import pandas as pd
from mne.time_frequency import psd_array_welch

############### Constants ###############
# define brain patches and frequency boundries
## channels name
epochs = mne.read_epochs('data/clean_data/sub-01_ses-01_task-baseline1_proc-clean_epo.fif')
ch_names = epochs.ch_names.copy()  # make sure to copy the list because it is mutable in place
[ch_names.remove(i) for i in ['M1', 'M2', 'EOG1', 'EOG2', 'ECG']]
all_channels = epochs.ch_names

# name of electrode groups
ba_patches = {'LF': ['Fp1', 'F3', 'F7', 'AF3', 'F1', 'F5'],
 'LC': ['C3', 'T7', 'FC1', 'FC3', 'FC5', 'C1', 'C5', 'FT7'],
 'LP': ['P3', 'P7', 'CP1', 'CP3', 'CP5', 'TP7', 'P1', 'P5'],
 'LO': ['O1', 'PO3'],
 'RF': ['Fp2', 'F4', 'F8', 'AF4', 'F2', 'F6',],
 'RC': ['C4', 'T8', 'FC2', 'FC4', 'FC6', 'C2', 'C6', 'FT8'],
 'RP': ['P4', 'P8', 'CP2', 'CP4', 'CP6', 'TP8', 'P2', 'P6'],
 'RO': ['O2', 'PO4'],
 'FZ': ['Fpz', 'Fz'],
 'OZ': ['POz', 'Oz', 'Iz'],
}

# index of electrode groups
ba_patches_ind = {}
for k,v in ba_patches.items():
    temp = [all_channels.index(i) for i in v]
    ba_patches_ind[k] = temp

# frequency indces
freq = dict(delta=(0, 23),
            theta=(24, 55),
            alpha=(56, 95),
            beta=(96, 232),
            gamma=(233, 330))

############ Helper Functions ############
def calculate_psd(epochs,
                  eeg_dir='data/clean_data/',
                  save_path='data/psds_dict.pkl',
                  save=False,
                  pick_channels=False,
                  ch_indices=None):
    psds_dict = {}
    for path in sorted(Path(eeg_dir).glob('sub-*.fif')):
        subject, task = re.search('sub-(.*)_ses-01_task-(.*)_proc-clean_epo.*', path.stem).groups()
        if task == 'baseline2' or 'induction' in task:
            continue
        print('>>>>>> get psds for subject: ', subject, ' task: ', task, ' <<<<<<')
        epochs = mne.read_epochs(path)
        data = np.hstack(epochs.get_data())
        if pick_channels:
            data = data[ch_indices]
        psds,freqs = psd_array_welch(data,
                                     sfreq=1000,
                                     fmin=1,
                                     fmax=42,
                                     n_fft=8000,
                                     verbose=0,
                                     average=None
                                     )
        psds_dict[subject+'_'+task] = psds

    psds_dict['freqs'] = freqs
    if save:
        with open(save_path, 'wb') as handle:
            pickle.dump(psds_dict, handle)
    return psds_dict

def aggregate_psds(psds_dict, ba_patches_ind, freq):
    """Aggregate PSDs across channels and frequency bands."""
    # create a dataframe from aggreagated data, power in picovolts
    psds_agg = {}
    for k1, v1 in psds_dict.items():
        for k2, v2 in ba_patches_ind.items():
            for k3, v3 in freq.items():
                psds_agg[k1+'-'+k2+'_'+k3] = v1[v2].mean(0)[v3[0]:v3[1]].mean(0) * 10000 ** 3 #picovolts
    return psds_agg

def create_classification_df(psds_agg,
                             bh_path='data/behavioral_data/archived/behavioral_data.csv',
                             save_path='data/classification_datasets/power_sensor_3rd.csv',
                             save=False):
    df = pd.DataFrame(psds_agg.items(), columns=['index', 'values']).set_index('index')
    df[['session', 'power']] = df.index.to_series().apply(lambda x:x.split('-')).apply(pd.Series)
    df.reset_index(drop=True, inplace=True)
    df = df.pivot(index='session', columns='power', values='values')

    # merge behavioral data with power data
    # open behavioral data
    bh = pd.read_csv(bh_path)
    bh = bh.iloc[:208]

    # merge with power data
    df[['bids_id', 'condition']] = df.index.to_series().apply(lambda x:x.split('_')).apply(pd.Series)
    df['session'] = df['condition'].apply(lambda x:x[-1])
    df.reset_index(drop=True, inplace=True)
    # change session and bids_id type at once to be the same type as the behavioral data
    df = df.astype({'session': 'int64'})
    df = pd.merge(bh, df, how='right', on=['session', 'bids_id'], right_index=False)
    df.insert(1, 'condition', df.pop('condition'))
    df = df.sort_values(by=['bids_id', 'session', 'condition']).reset_index(drop=True)

    if save:
        df.to_csv(save_path)
    return df

In [12]:
# calculate unaggreated psds
eeg_dir = '/Volumes/Extreme_SSD/PhD/OTKA_study1/clean_data/'
save_path = 'docs/psds_unaggregared_OZ-FZ_14112024.pkl'
ch_indices = ba_patches_ind['OZ'] + ba_patches_ind['FZ']

if os.path.exists(save_path):
    with open(save_path, 'rb') as handle:
        psds_dict = pickle.load(handle)
    freqs = psds_dict['freqs']
    psds_dict.pop('freqs')
else:
    psds_dict = calculate_psd(epochs, eeg_dir, save_path=save_path, save=True,
                              pick_channels=True, ch_indices=ch_indices)

### Z-transformation of the power values

In [None]:
psds_z_trans = {}
bands = ['theta', 'alpha', 'gamma']
for k in psds_dict.keys():
    print('>>>>>>>>>', k)
    if 'baseline' in k:
        base = psds_dict[k]
        base_mean = base.mean(2)
        base_std = base.std(2)
        continue
    activity = psds_dict[k]
    activity_z = (activity - base_mean[:, :, np.newaxis]) / base_std[:, :, np.newaxis]
    activity_z = activity_z.mean(2)
    sub, task = k.split('_')
    for band in bands:
        # OZ_alpha_aper_experience1
        psds_z_trans[sub+'_'+'OZ_'+band+'_'+task+'_Z-trans'] = activity_z[:3, freq[band][0]:freq[band][1]].mean(1).mean(0)
        psds_z_trans[sub+'_'+'FZ_'+band+'_'+task+'_Z-trans'] = activity_z[3:, freq[band][0]:freq[band][1]].mean(1).mean(0)

In [None]:
df_z = pd.DataFrame(data=psds_z_trans.values(), index=psds_z_trans.keys(), columns=['values'])
df_z['bids_id'] = df_z.index.to_series().apply(lambda x:np.float64(x.split('_')[0]))
df_z['cols'] = df_z.index.to_series().apply(lambda x:x.split('_')[1:]).apply(lambda x:'_'.join(x))
df_z = df_z.pivot(index='bids_id', columns='cols', values='values')
# add nine rows of NaNs to the firs of the dataframe to match the behavioral data
df_z = pd.concat([pd.DataFrame(np.nan, index=np.arange(9), columns=df_z.columns), df_z])
df_z['bids_id'] = df_z.index
df_z.iloc[:9, -1] = np.nan  # set the first 9 bids_ids to NaN
df_z.reset_index(drop=True, inplace=True)
# df_z.to_csv('data/z_trans_power_sensor_OZ-FZ.csv')

#### another way of z-transforming data

In [11]:
power_data = pd.read_csv('data/classification_datasets/power_sensor.csv', index_col=0)
bands = ['theta', 'alpha', 'gamma']
cols = [i for i in power_data.columns for j in bands if j in i and ('FZ' in i or 'OZ' in i) and 'decibel' not in i]
cols = ['bids_id', 'condition'] + cols

# baseline
baseline = power_data[cols].query("condition.str.contains('baseline1')").reset_index(drop=True).drop('condition', axis=1)
# repeat each row 2 times
baseline = pd.concat([baseline]*4, ignore_index=True).sort_values('bids_id').reset_index(drop=True)
baseline = baseline.melt(id_vars=['bids_id'], var_name='sensor_base', value_name='power_base')


# expeirience
experience = power_data.query("condition.str.contains('experience')")
experience = experience[cols].dropna().reset_index(drop=True)
experience = experience.melt(id_vars=['bids_id', 'condition'], var_name='sensor_exp', value_name='power_exp')

# join
experience = experience.join(baseline, lsuffix='_exp', rsuffix='_base').drop('bids_id_base', axis=1).rename(columns={'bids_id_exp': 'bids_id'})
experience['bids_id'] = experience['bids_id'].apply(lambda x: str(x).zfill(2))

# baseline correct and z transform the power
experience['baseline_corrected'] = experience.apply(lambda r: r['power_exp'] - r['power_base'], axis=1)
experience['baseline_corrected'] = experience['baseline_corrected'].transform(lambda x: (x - x.mean()) / x.std())

### Other Visualisations

In [None]:
psd_1 = psd1['01-experience1'].mean(axis=0)
psd_z = (psd_1[3] - np.mean(psd_1[3])) / np.std(psd_1[3])
psd_zc = (psd_1[3] - baseline_1[3].mean()) / np.std(baseline_1[3])
psd_subtracted = psd_1[3] - baseline_1[3].mean()

plt.plot(freqs, psd_z, label='z-transfomed')
plt.plot(freqs, psd_zc, label='z-transfomed with baseline')
plt.plot(freqs, psd_subtracted * 10**12, label='subtracted baseline')
plt.plot(freqs, psd_2, label='decible conversion')
plt.legend()
plt.grid()