### Configuration

In [1]:
import os
import numpy as np
import pandas as pd
import xarray as xr

import mne
from mne.time_frequency import tfr_array_morlet
from scipy.stats import zscore

from utils__helpers_macro import hilbert_powerphase
import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'Z:\\Layton\\Sleep_083023'

### Parameters

In [3]:
micro_fif_path = 'Cache/Subject05/Jul13/S05_Jul13_micro_1024hz.fif'
tfr_path = 'Cache/Subject05/Jul13/S05_Jul13_micro_TFR_CLA_broad.csv'
sampling_freq = 1024
resample_frequency = 512 # frequency to resample to prior to Morlet (reduces memory usage)
tfr_decimation = 3 # decimation by Morlet; reduces memory usage but removes the ability to keep true time!
mean_bin_division = (resample_frequency / tfr_decimation) * 10 # division factor to bin samples into mean
rolling_mean_samples = 3 # number of samples over which to calculate rolling mean

### Format Data

In [4]:
# Load Data
raw = mne.io.read_raw_fif(micro_fif_path, preload = True, verbose = None)

# Generate list of desired channel names
desired_channels = [f"Channel{n}" for n in range(193, 200)]

# Keep only the desired channels
raw.pick_channels(desired_channels)

# Decimate to reduce memory usage
raw.resample(resample_frequency)

# Save timestamps for later
timestamps = raw.times

if tfr_decimation > 1:
    timestamps = timestamps[::tfr_decimation]

# Format Data for tfr_array_morlet()
ts_array = raw.get_data(units = dict(seeg = 'uV', eeg = 'uV'))
ts_array = ts_array[np.newaxis, :, :]

Opening raw data file Cache/Subject05/Jul13/S05_Jul13_micro_1024hz.fif...


  raw = mne.io.read_raw_fif(micro_fif_path, preload = True, verbose = None)


    Range : 0 ... 8383487 =      0.000 ...  8186.999 secs
Ready.
Opening raw data file Z:\Layton\Sleep_083023\Cache\Subject05\Jul13\S05_Jul13_micro_1024hz-1.fif...
    Range : 8383488 ... 16766975 =   8187.000 ... 16373.999 secs
Ready.
Opening raw data file Z:\Layton\Sleep_083023\Cache\Subject05\Jul13\S05_Jul13_micro_1024hz-2.fif...
    Range : 16766976 ... 23003136 =  16374.000 ... 22464.000 secs
Ready.
Reading 0 ... 23003136  =      0.000 ... 22464.000 secs...
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


### Morlet Transform

In [5]:
freqs = np.arange(1, 201, 3)
#freqs = np.arange(1, 26, 1)

# Create time-frequency representation
# using the Morlet Wavelet transform:
tfr = tfr_array_morlet(ts_array, 
                       sfreq = raw.info['sfreq'],
                       freqs = freqs, 
                       n_cycles = 6.0,
                       zero_mean = False, 
                       use_fft = True, 
                       decim = tfr_decimation, 
                       output = 'power', 
                       n_jobs = -1, 
                       verbose = None)

# Remove the dummy dimension (that was required
# due to formatting expectations of MNE Morlet):
tfr = np.squeeze(tfr)

# Convert to Xarray as an intermediate step in
# getting data into Pandas long (2d) format:
tfr = xr.DataArray(tfr,
                   dims = ('channel', 'frequency', 'seconds'),
                   coords = {'channel' : raw.ch_names,
                             'frequency' : freqs,
                             'seconds' : timestamps})

tfr = tfr.to_dataframe(name = 'power').reset_index()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   7 | elapsed:  2.4min remaining:  6.0min
[Parallel(n_jobs=-1)]: Done   4 out of   7 | elapsed:  2.4min remaining:  1.8min
[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:  2.5min finished


### Time Bin for Convenience

In [6]:
# Average 1 second values into 30s epoch values
# (bin stop value is arbitrarily large, 1mil seconds is 277 hrs)
# (alternative to the next two optional steps)
bin_list = np.arange(0, 1000000, 30)

tfr['epoch'] = pd.cut(tfr['seconds'], bins = bin_list, labels = False)
tfr = tfr.groupby(['channel', 'frequency', 'epoch']).mean('power')
tfr = tfr.reset_index()[['channel', 'frequency', 'epoch', 'power']]

### Log Normalize by Frequency

In [7]:
# Channel-wise and frequency-wise normalization, 
# since each channel (each one has a different
# baseline power magnitude) and frequency (1/f):
tfr['meanpower'] = tfr.groupby(['channel', 'frequency'])['power'].transform('mean')
tfr['log_meanpower'] = 10 * np.log10(tfr['meanpower'])

tfr['logpower'] = 10 * np.log10(tfr['power'])
tfr['logpower_mean'] = tfr.groupby(['channel', 'frequency'])['logpower'].transform('mean')

tfr['logmpower_freq'] = tfr['logpower'] - tfr['log_meanpower']
tfr['logpower_freq'] = tfr['logpower'] - tfr['logpower_mean']

tfr.drop(columns = ['meanpower', 'log_meanpower', 'logpower_mean'], inplace = True)

# Now calculate frequency-wise zscores from the log(power)
tfr['lmpf_zscore'] = tfr.groupby(['channel', 'frequency'])['logmpower_freq'].transform(zscore)
tfr['lpf_zscore'] = tfr.groupby(['channel', 'frequency'])['logpower_freq'].transform(zscore)

### Mean, Smooth, and/or Decimate

In [8]:
# Rolling mean to smooth TFR (optional step)
saved_epochs = tfr['epoch']

tfr = tfr.groupby(['channel', 'frequency']).rolling(window = rolling_mean_samples, 
                                                    min_periods = 1, 
                                                    center = True, 
                                                    win_type = 'gaussian').mean()
tfr = tfr.reset_index()
tfr.drop(columns = ['level_2'], inplace = True)

tfr['epoch'] = saved_epochs

In [9]:
tfr.to_csv(tfr_path, index = False)