### Configuration

In [1]:
import os
import numpy as np
import pandas as pd
import xarray as xr

import mne
from scipy.stats import zscore, circmean
from joblib import Parallel, delayed

from utils__helpers_macro import hilbert_powerphase
import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'Z:\\Layton\\Sleep_083023'

### Parameters

In [1]:
macro_fif_path = 'Cache/Subject05/Jul11//S05_Jul11_256hz.fif'
micro_fif_path = 'Cache/Subject05/Jul11/S05_Jul11_micro_1024hz.fif'
bad_channel_path = 'Cache/Subject05/Jul11/S05_bad_channels.csv'
sw_path = 'Cache/Subject05/Jul11/S05_SW.csv'
legui_path = 'Cache/Subject05/S05_electrodes.csv'

In [None]:
n_jobs = -2

### Sub-Delta Phase

In [4]:
# Load macro EEG data
delta = mne.io.read_raw_fif(macro_fif_path, preload = True, verbose = False)

# Select only macroelectrodes
delta.pick_types(seeg = True, ecog = True)

# Remove rejected channels
bad_channels = pd.read_csv(bad_channel_path)
bad_channels = bad_channels[bad_channels['channel'].isin(delta.ch_names)]
delta.drop_channels(ch_names = bad_channels['channel'].astype('string'))

# Extract Power and Phase
delta = hilbert_powerphase(data = delta, lower = 0.3, upper = 1.5, njobs = n_jobs)
delta = delta[['time', 'channel', 'power', 'phase']]

  delta = mne.io.read_raw_fif(macro_fif_path, preload = True, verbose = False)


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.3 - 1.5 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.30
- Lower transition bandwidth: 0.30 Hz (-6 dB cutoff frequency: 0.15 Hz)
- Upper passband edge: 1.50 Hz
- Upper transition bandwidth: 2.00 Hz (-6 dB cutoff frequency: 2.50 Hz)
- Filter length: 2817 samples (11.004 s)



[Parallel(n_jobs=-2)]: Using backend LokyBackend with 31 concurrent workers.
[Parallel(n_jobs=-2)]: Done  34 out of  53 | elapsed:    8.0s remaining:    4.4s
[Parallel(n_jobs=-2)]: Done  45 out of  53 | elapsed:    8.5s remaining:    1.4s
[Parallel(n_jobs=-2)]: Done  53 out of  53 | elapsed:    8.8s finished


Converting "channel" to "category"...
Converting "ch_type" to "category"...


### High-Frequency Activity

In [5]:
# def process_channel(ch_name, raw_path):
#     # Load the raw file without preloading the data
#     raw = mne.io.read_raw_fif(raw_path, preload=False, verbose=False)
    
#     # Only pick the channels that we are interested in
#     raw.pick_channels([ch_name])
    
#     # Now, load the data for the specific channel
#     raw.load_data()
    
#     # Bandpass filter the channel data with n_jobs=1 since parallelization is handled by joblib
#     raw.filter(l_freq=70, h_freq=200, n_jobs=1)
    
#     # Extract data and times
#     data, times = raw[:][0], raw.times
    
#     # Create time bins
#     time_bins = ((times * 1000).astype(int) // 10 * 10).astype(float) / 1000.0
    
#     # Create a DataFrame and bin the data
#     df = pd.DataFrame({'time': times, 'hfa': data.flatten()})
#     df['time_bin'] = time_bins
#     df_grouped = df.groupby('time_bin')['hfa'].mean().reset_index()
    
#     # Z-score the binned data
#     df_grouped['hfa'] = (df_grouped['hfa'] - df_grouped['hfa'].mean()) / df_grouped['hfa'].std()
    
#     # Add the channel name to the DataFrame
#     df_grouped['micro_channel'] = ch_name
    
#     return df_grouped

In [13]:
def process_channel(ch_name, raw_path):
    # Load the raw file without preloading the data
    raw = mne.io.read_raw_fif(raw_path, preload=False, verbose=False)
    
    # Only pick the channels that we are interested in
    raw.pick_channels([ch_name])
    
    # Now, load the data for the specific channel
    raw.load_data()

    # Extract data into a numpy array and reshape for tfr_array_morlet
    data, times = raw[:, :]
    data = data[np.newaxis, :, :]  # Reshape data to (n_epochs, n_channels, n_times)

    # Define frequencies for the Morlet wavelet transform
    frequencies = [70, 200]
    n_cycles = [freq / 2 for freq in frequencies]  # Adjust n_cycles for each frequency

    # Compute power using Morlet wavelet transform with tfr_array_morlet
    power = mne.time_frequency.tfr_array_morlet(data, sfreq=raw.info['sfreq'], freqs=frequencies, 
                                                n_cycles=n_cycles, output='power', 
                                                decim=1)

    # Extract power data and average across frequencies
    power_data = power.mean(axis=2).flatten()  # Averaging across frequencies
    power_data = power_data.reshape(-1)  # Flatten the array

    # Create time bins
    time_bins = ((times * 1000).astype(int) // 10 * 10).astype(float) / 1000.0

    # Create a DataFrame and bin the data
    df = pd.DataFrame({'time': times, 'hfa_power': power_data})
    df['time_bin'] = time_bins
    df_grouped = df.groupby('time_bin')['hfa_power'].mean().reset_index()
    
    # Z-score the binned data using scipy.stats.zscore
    df_grouped['hfa_power'] = zscore(df_grouped['hfa_power'])

    # Add the channel name to the DataFrame
    df_grouped['micro_channel'] = ch_name
    
    return df_grouped

In [14]:
# List of channel names can be obtained from the raw file (assuming it is not too large to load just for this purpose)
raw = mne.io.read_raw_fif(micro_fif_path, preload=False, verbose=False)
channel_names = raw.ch_names

# Process each channel in parallel using joblib
processed_data = Parallel(n_jobs=n_jobs)(
    delayed(process_channel)(ch_name, micro_fif_path) for ch_name in channel_names
)

# Concatenate the processed data for all channels into a single DataFrame
hfa_long = pd.concat(processed_data, ignore_index=True)

  raw = mne.io.read_raw_fif(micro_fif_path, preload=False, verbose=False)


### Slow Waves

In [16]:
# Load Slow Wave data
sw_times = pd.read_csv(sw_path)

# Merge with LeGUI to get channel laterality
legui = pd.read_csv(legui_path)
legui = legui[['elec_label', 'hemisphere', 'roi_1']]
legui.columns = ['Channel', 'laterality', 'region']
sw_times = sw_times.merge(legui, on = 'Channel', how = 'inner')

# Select and rename SW columns
sw_times = sw_times[['ID', 'Channel', 'laterality', 'region', 'Start', 'End',
                     'NegPeak', 'MidCrossing', 'PosPeak', 'ValNegPeak', 'PTP']]
sw_times.columns = ['sw_id', 'channel_id', 'sw_laterality', 'sw_region', 'start', 'end',
                    'negative_peak', 'mid_crossing', 'positive_peak', 'npeak_amp', 'ptp_amp']

### Merge Data

In [17]:
# Convert the start and end times to an IntervalIndex
#sw_times['interval'] = pd.IntervalIndex.from_arrays(sw_times['start'], sw_times['end'], closed='both')

# Ensure that both DataFrames are sorted appropriately
delta = delta.sort_values(by='time')
sw_times = sw_times.sort_values(by='start')

# Retype before merging
delta['channel'] = delta['channel'].astype('str')

# Rename before merging
sw_times = sw_times.rename(columns={'channel_id': 'channel'})

In [18]:
from joblib import Parallel, delayed

def process_group(name, group_sw, delta):
    group_filtered_dfs = []
    
    # Filter the delta dataframe for the current channel
    group_delta = delta[delta['channel'] == name]

    for _, row in group_sw.iterrows():
        mask = group_delta['time'].between(row['start'], row['end'])
        group_filtered_dfs.append(group_delta[mask])

    return pd.concat(group_filtered_dfs, ignore_index=True)

# Use the group name and dataframe as inputs to the function
results = Parallel(n_jobs=n_jobs, verbose=10)(
    delayed(process_group)(name, group, delta) for name, group in sw_times.groupby('channel')
)

# Concatenate all the filtered DataFrames into a single DataFrame
result = pd.concat(results, ignore_index=True)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 31 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-2)]: Done   2 out of  17 | elapsed:  6.2min remaining: 46.8min
[Parallel(n_jobs=-2)]: Done   4 out of  17 | elapsed: 10.3min remaining: 33.6min
[Parallel(n_jobs=-2)]: Done   6 out of  17 | elapsed: 14.4min remaining: 26.4min
[Parallel(n_jobs=-2)]: Done   8 out of  17 | elapsed: 18.5min remaining: 20.8min
[Parallel(n_jobs=-2)]: Done  10 out of  17 | elapsed: 22.6min remaining: 15.8min
[Parallel(n_jobs=-2)]: Done  12 out of  17 | elapsed: 26.7min remaining: 11.1min
[Parallel(n_jobs=-2)]: Done  14 out of  17 | elapsed: 30.7min remaining:  6.6min
[Parallel(n_jobs=-2)]: Done  17 out of  17 | elapsed: 36.3min finished


In [19]:
# Convert time to milliseconds, perform integer division for binning, then convert back to seconds for bin starting points
result['time_bin'] = ((result['time'] * 1000).astype(int) // 10 * 10) / 1000.0

# Calculate the midpoint of each bin (adding 5 milliseconds, or 0.005 seconds, to the starting point)
result['time_bin_mid'] = result['time_bin'] + 0.005

# Group by Channel and Time Bin, and Aggregate using circmean
result = result.groupby(['channel', 'time_bin_mid']).agg(circular_mean_phase=('phase', lambda x: circmean(x, high=np.pi, low=-np.pi) if len(x) > 0 else np.nan)).reset_index()

result.columns = ['channel', 'time_bin', 'phase']

### Final merge and save 

In [20]:
# Assuming hfa_long and result are your dataframes
unique_micro_channels = hfa_long['micro_channel'].unique()
merged_dfs = []

for channel in unique_micro_channels:
    # Filter hfa_long for the current micro_channel
    temp_hfa = hfa_long[hfa_long['micro_channel'] == channel]
    
    # Merge on 'time_bin', ensuring both dataframes are sorted by 'time_bin'
    merged_df = pd.merge_asof(result.sort_values('time_bin'),  
                              temp_hfa.sort_values('time_bin'), 
                              on='time_bin')
    
    # Append the merged dataframe to the list
    merged_dfs.append(merged_df)

# Concatenate all the merged dataframes
final_result = pd.concat(merged_dfs, ignore_index=True)

In [21]:
# Define the number of bins and calculate the bin width
num_bins = 50
bin_width = 2 * np.pi / num_bins

# Calculate the bin index for each phase
final_result['phase_bin_index'] = np.floor((final_result['phase'] + np.pi) / bin_width).astype(int)

# Calculate the midpoint of each bin
final_result['phase_bin_midpoint'] = (final_result['phase_bin_index'] * bin_width) - np.pi + (bin_width / 2)

# Drop the 'phase_bin' and 'phase_bin_index' columns
final_result.drop(['phase_bin_index'], axis=1, inplace=True)

# Rename 'phase_bin_midpoint' to 'phase_bin'
final_result.rename(columns={'phase_bin_midpoint': 'phase_bin'}, inplace=True)

In [25]:
final_result

Unnamed: 0,channel,time_bin,phase,hfa_power,micro_channel,phase_bin
0,RAM1,1021.925,1.540187,0.050916,Channel193,1.570796
1,RAM1,1021.935,1.595519,0.049849,Channel193,1.570796
2,RAM1,1021.945,1.664798,0.048767,Channel193,1.696460
3,RAM1,1021.955,1.734181,0.047324,Channel193,1.696460
4,RAM1,1021.965,1.803705,0.045321,Channel193,1.822124
...,...,...,...,...,...,...
200710779,RHC1,33929.375,1.334802,-0.085211,Channel256,1.319469
200710780,RHC1,33929.385,1.421065,-0.086271,Channel256,1.445133
200710781,RHC1,33929.395,1.507248,-0.087380,Channel256,1.445133
200710782,RHC1,33929.405,1.593272,-0.088547,Channel256,1.570796


### Plotting

In [None]:
# Bin HFA values
scale_factor = 1000  # Adjust as needed
bin_size = 10  # Adjust as needed
final_result['hfa_bin'] = ((final_result['hfa_power'] * scale_factor).astype(int) // bin_size * bin_size) / scale_factor

# Group and count occurrences
grouped = final_result.groupby(['channel', 'micro_channel', 'phase_bin', 'hfa_bin']).size().reset_index(name='count')

In [33]:
grouped.to_csv('wat.csv', index = False)

In [31]:
import matplotlib.pyplot as plt
import seaborn as sns

# Custom plotting function for heatmaps
def heatmap(data, x, y, color, **kwargs):
    pivot_data = data.pivot_table(index=y, columns=x, values=color)
    sns.heatmap(pivot_data, **kwargs)

# Create the Facet Grid
g = sns.FacetGrid(grouped, row='micro_channel', col='channel', margin_titles=True)

# Use map_dataframe to apply the custom heatmap plotting function
g.map_dataframe(heatmap, x='phase_bin', y='hfa_bin', color='count')

# Additional plot formatting
g.set_axis_labels("Phase Bin", "HFA Bin Count")
g.set_titles(col_template="{col_name}", row_template="{row_name}")
plt.xticks(rotation=90)
plt.show()

  if pd.api.types.is_categorical_dtype(vector):
  if pd.api.types.is_categorical_dtype(vector):


KeyboardInterrupt: 

Error in callback <function flush_figures at 0x000001F87B6CC7C0> (for post_execute):


KeyboardInterrupt: 