### Configuration

In [1]:
import os
import numpy as np
import pandas as pd

import tphate # requires Python 3.7 - 3.9

import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'G:\\My Drive\\Residency\\Research\\Lab - Damisah\\Project - Sleep\\Revisions'

### Parameters

In [3]:
# hypno_path = 'Data/S01_Feb02_epochs_10s_hypno.csv'
# spikes_path = 'Data/S01_Feb02_epochs_10s_spikes.csv'
# swa_path = 'Data/S01_Feb02_epochs_10s_swa.csv'
# response_path = 'Data/S01_Feb02_unit_response.csv'
# chan_path = 'Data/S01_electrodes.csv'
# recording_label = 'S01_Feb02'
# output_path = 'Cache/S01_Feb02_model_stack_10s.csv'

hypno_path = 'Data/S05_Jul11_epochs_10s_hypno.csv'
spikes_path = 'Data/S05_Jul11_epochs_10s_spikes.csv'
swa_path = 'Data/S05_Jul11_epochs_10s_swa.csv'
response_path = 'Data/S05_Jul11_unit_response.csv'
chan_path = 'Data/S05_electrodes.csv'
recording_label = 'S05_Jul11'
output_path = 'Cache/S05_Jul11_model_stack_10s.csv'

# hypno_path = 'Data/S05_Jul12_epochs_10s_hypno.csv'
# spikes_path = 'Data/S05_Jul12_epochs_10s_spikes.csv'
# swa_path = 'Data/S05_Jul12_epochs_10s_swa.csv'
# response_path = 'Data/S05_Jul12_unit_response.csv'
# chan_path = 'Data/S05_electrodes.csv'
# recording_label = 'S05_Jul12'
# output_path = 'Cache/S05_Jul12_model_stack_10s.csv'

# hypno_path = 'Data/S05_Jul13_epochs_10s_hypno.csv'
# spikes_path = 'Data/S05_Jul13_epochs_10s_spikes.csv'
# swa_path = 'Data/S05_Jul13_epochs_10s_swa.csv'
# response_path = 'Data/S05_Jul13_unit_response.csv'
# chan_path = 'Data/S05_electrodes.csv'
# recording_label = 'S05_Jul13'
# output_path = 'Cache/S05_Jul13_model_stack_10s.csv'

In [4]:
window_size = 3
tphate_dims = 2
tphate_cores = 6
np.random.seed(42) # reproducibility for T-PHATE

### Munging

In [5]:
hypno_epochs = pd.read_csv(hypno_path)
spike_epochs = pd.read_csv(spikes_path)
swa_epochs = pd.read_csv(swa_path)
response = pd.read_csv(response_path)
channels = pd.read_csv(chan_path)

In [6]:
# Hypnogram dictionary: 
# (-2) = Unassigned
# (-1) = Artifact
# (0) = Awake
# (1) = N1
# (2) = N2
# (3) = N3
# (4) = REM

# Extract and group sleep stages
hypno_epochs['NREM'] = np.where(hypno_epochs['stage'].isin([2, 3]), 1, 0)
hypno_epochs['WREM'] = np.where(hypno_epochs['stage'].isin([0, 1, 4]), 1, 0)
hypno_epochs = hypno_epochs.drop(columns=['stage'])

# Format SWA and Channel Info
swa_epochs['epoch'] = swa_epochs['epoch'].astype('int64')
swa_epochs = swa_epochs[['epoch', 'channel', 'zlog_power', 'sw_ratio']]
swa_epochs.columns = ['epoch', 'channel', 'power', 'sw']

channels = channels[channels.type == 'macro']
channels = channels[['elec_label', 'hemisphere', 'roi_3', 'lobe_1']]
channels.columns = ['channel', 'laterality', 'region', 'lobe']

# Merge SWA + Channel Info, then calculate global SWA power
swa_epochs = swa_epochs.merge(channels, on='channel', how='inner')
regional_swa_epochs = swa_epochs.groupby(['epoch', 'laterality', 'lobe'])[['power', 'sw']].mean().reset_index()
global_swa_epochs = regional_swa_epochs.groupby(['epoch'])[['power', 'sw']].mean().reset_index()

# Format unit response type
response = response[['unit', 'response_type']]
response.columns = ['unit_id', 'response']
response['response'] = response['response'].fillna("None")

# Merge spikes with response and generate subsets
spikeponse = spike_epochs.merge(response, on='unit_id', how='inner')

# Define the subset criteria
criteria = [
    {"subset": "all_cla", "filter": (spikeponse["unit_region"] == "CLA")},
    {"subset": "positive_cla", "filter": (spikeponse["unit_region"] == "CLA") & (spikeponse["response"] == "Positive")},
    {"subset": "other_cla", "filter": (spikeponse["unit_region"] == "CLA") & (spikeponse["response"] != "Positive")},
    
    {"subset": "all_amy", "filter": (spikeponse["unit_region"] == "AMY")},
    {"subset": "positive_amy", "filter": (spikeponse["unit_region"] == "AMY") & (spikeponse["response"] == "Positive")},
    {"subset": "other_amy", "filter": (spikeponse["unit_region"] == "AMY") & (spikeponse["response"] != "Positive")},

    {"subset": "all_acc", "filter": (spikeponse["unit_region"] == "ACC")},
    {"subset": "positive_acc", "filter": (spikeponse["unit_region"] == "ACC") & (spikeponse["response"] == "Positive")},
    {"subset": "other_acc", "filter": (spikeponse["unit_region"] == "ACC") & (spikeponse["response"] != "Positive")}
]

### Firing rate by subset

Option 1: Group mean FR by subset (you will need to adapt the Epoch Stacking code in order to use this)

In [7]:
# unit_epochs = pd.DataFrame()

# # Loop through each subset criteria
# for crit in criteria:

#     # Filter the data based on the current criteria
#     subset_data = spikeponse[crit["filter"]]
    
#     # Group by 'epoch' alone to calculate the global mean
#     global_mean = subset_data.groupby('epoch')['fr'].mean().reset_index()
#     global_mean['subset'] = crit["subset"]  # Add the subset column
    
#     # Append the result to the 'subsets' DataFrame
#     unit_epochs = pd.concat([unit_epochs, global_mean], ignore_index=True)

Option 2: T-PHATE of FR by subset

In [8]:
# Initialize an empty DataFrame to store the results
unit_epochs = pd.DataFrame()

# Function to apply T-PHATE and return a DataFrame with the results
def apply_tphate(data, n_components, n_jobs):
    tp_model = tphate.TPHATE(n_components=n_components, n_jobs=n_jobs)
    transformed_data = tp_model.fit_transform(data)
    return pd.DataFrame(transformed_data, columns=[f'tphate_{i+1}' for i in range(n_components)])

# Loop through each subset criteria
for crit in criteria:
    # Filter the data based on the current criteria
    subset_data = spikeponse[crit["filter"]]

    # Check if the filtered data is empty
    if subset_data.empty:
        print(f"Subset '{crit['subset']}' resulted in an empty dataframe. Skipping...")
        continue

    # Count the unique unit_ids in the current subset
    unique_unit_num = subset_data['unit_id'].nunique()

    # Pivot the data to wide format for T-PHATE
    wide_data = subset_data.pivot(index='epoch', columns='unit_id', values='fr').fillna(0)

    # Apply T-PHATE
    tphate_data = apply_tphate(wide_data.to_numpy(), n_components=tphate_dims, n_jobs = tphate_cores)

    # Add back the metadata information
    tphate_data['epoch'] = wide_data.index
    tphate_data['subset'] = crit["subset"]
    tphate_data['unit_num'] = unique_unit_num  # Add the count of unique unit_ids as a new column

    # Append the result to the 'unit_epochs' DataFrame
    unit_epochs = pd.concat([unit_epochs, tphate_data], ignore_index=True)

Calculating TPHATE...
  Running TPHATE on 3485 observations and 12 variables.
  Landmarking not recommended; setting n_landmark to 3485
  Calculating graph and diffusion operator...
    Calculating KNN search...
    Calculated KNN search in 0.22 seconds.
    Calculating affinities...
    Calculated affinities in 0.01 seconds.
  Calculated graph and diffusion operator in 0.24 seconds.
  Learning the autocorrelation function...
  Calculating Autocorr kernel...
    Dropoff point: 727
  Combining PHATE operator and autocorr operator
  Calculating optimal t...
    Automatically selected t = 18
  Calculated optimal t in 7.85 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.84 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 89.88 seconds.
Calculated TPHATE in 101.55 seconds.
Calculating TPHATE...
  Running TPHATE on 3485 observations and 6 variables.
  Landmarking not recommended; setting n_landmark to 3485
  Calculating graph and diffusion o



    Calculated affinities in 0.63 seconds.
  Calculated graph and diffusion operator in 0.77 seconds.
  Learning the autocorrelation function...
  Calculating Autocorr kernel...
    Dropoff point: 1128


  K.data = np.exp(-1 * np.power(K.data, self.decay))


  Combining PHATE operator and autocorr operator
  Calculating optimal t...
    Automatically selected t = 5
  Calculated optimal t in 8.47 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.63 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 85.90 seconds.
Calculated TPHATE in 99.23 seconds.
Calculating TPHATE...
  Running TPHATE on 3485 observations and 3 variables.
  Landmarking not recommended; setting n_landmark to 3485
  Calculating graph and diffusion operator...
    Calculating KNN search...
    Calculated KNN search in 0.12 seconds.
    Calculating affinities...
    Calculated affinities in 0.01 seconds.
  Calculated graph and diffusion operator in 0.13 seconds.
  Learning the autocorrelation function...
  Calculating Autocorr kernel...
    Dropoff point: 301




  Combining PHATE operator and autocorr operator
  Calculating optimal t...
    Automatically selected t = 8
  Calculated optimal t in 10.25 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.69 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 92.53 seconds.
Calculated TPHATE in 106.02 seconds.


Merge FR by subset with hypnogram and LFP data

In [9]:
# Merge all the datasets
data = hypno_epochs.merge(unit_epochs, on='epoch', how='inner')
data = data.merge(global_swa_epochs, on='epoch', how='inner')

# Create 'DREM' (delta-NREM), which is NREM with high delta power
data['DREM'] = np.where((data['NREM'] == 1) & (data['power'] > 1), 1, 0)

# Final columns for 'data' will be:
# 'subset', 'epoch', 'power', 'sw', 'WREM', 'NREM', 'DREM', 'unit_num'
# plus either 'fr' (group mean) or 'tphate_1' ... 'tphate_n' (T-PHATE)

### Epoch stacking for lagged regression

In [10]:
# Initialize a list to store DataFrames for each subset
all_subsets = []

# Loop through each unique subset value
for selected_subset in data['subset'].unique():

    # Filter the data for the current subset and sort by 'epoch'
    subset_data = data[data['subset'] == selected_subset].sort_values('epoch')

    # Extract unit_num value (assuming it's the same for all rows within the subset)
    unit_num_value = subset_data['unit_num'].iloc[0]

    # Identify all tphate columns
    tphate_columns = [col for col in subset_data.columns if 'tphate_' in col]

    # Initialize a list to store the new rows for the current subset
    window_data = []

    # Loop through the DataFrame, starting from 'window_size - 1' to have enough previous values
    for i in range(window_size - 1, len(subset_data)):
        new_row = []

        # Loop through each T-PHATE dimension to get the lagged values
        for tphate_col in tphate_columns:
            
            # Use values from current and previous epochs
            tphate_lagged = subset_data.iloc[i - window_size + 1:i + 1][tphate_col].values[::-1]
            new_row.extend(tphate_lagged)

        # Get the 'power', 'NREM', and 'DREM' values for the current epoch
        power_value = subset_data.iloc[i]['power']
        nrem_value = subset_data.iloc[i]['NREM']
        drem_value = subset_data.iloc[i]['DREM']

        # Append these values and the current epoch to the new row
        new_row.extend([power_value, nrem_value, drem_value, subset_data.iloc[i]['epoch']])

        # Add the new row to the window_data list
        window_data.append(new_row)

    # Column names for the new DataFrame, including the epoch
    column_names = [f'{tphate_col}_{window_size - j}' for tphate_col in tphate_columns for j in range(window_size)] + ['power', 'NREM', 'DREM', 'epoch']
    
    # Convert the list of new rows into a DataFrame for the current subset and add meta-data
    subset_stack = pd.DataFrame(window_data, columns=column_names)
    subset_stack['subset'] = selected_subset
    subset_stack['unit_num'] = unit_num_value
    
    # Append the DataFrame for the current subset to the list
    all_subsets.append(subset_stack)

# Concatenate all the subset DataFrames into one
data_stack = pd.concat(all_subsets, ignore_index=True)

# Assuming 'recording_label' is defined
data_stack['recording'] = recording_label

In [11]:
data_stack.to_csv(output_path, index = False)

In [12]:
data_stack

Unnamed: 0,tphate_1_3,tphate_1_2,tphate_1_1,tphate_2_3,tphate_2_2,tphate_2_1,power,NREM,DREM,epoch,subset,unit_num,recording
0,0.083020,0.083714,0.084862,-0.004313,-0.004305,-0.004292,-0.007667,0,0,2,all_cla,12,S05_Jul11
1,0.082123,0.083020,0.083714,-0.004328,-0.004313,-0.004305,0.093267,0,0,3,all_cla,12,S05_Jul11
2,0.081384,0.082123,0.083020,-0.004336,-0.004328,-0.004313,0.010867,0,0,4,all_cla,12,S05_Jul11
3,0.080590,0.081384,0.082123,-0.004350,-0.004336,-0.004328,0.238600,0,0,5,all_cla,12,S05_Jul11
4,0.079893,0.080590,0.081384,-0.004360,-0.004350,-0.004336,-0.258667,0,0,6,all_cla,12,S05_Jul11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27859,0.080604,0.079014,0.077181,0.044429,0.043885,0.043072,-0.600933,0,0,3480,other_acc,3,S05_Jul11
27860,0.082536,0.080604,0.079014,0.045249,0.044429,0.043885,-0.578400,0,0,3481,other_acc,3,S05_Jul11
27861,0.084007,0.082536,0.080604,0.045817,0.045249,0.044429,-0.302933,0,0,3482,other_acc,3,S05_Jul11
27862,0.086061,0.084007,0.082536,0.046665,0.045817,0.045249,-0.380667,0,0,3483,other_acc,3,S05_Jul11
