In [1]:
# examining data files in source
import numpy as np
import pandas as pd
import pickle 
%load_ext autoreload
%autoreload
from general import *
from SWRmodule import *

  class TimeSeriesX(TimeSeries):


The purpose of this notebook is to detail the process used for the ripple analysis.

For each session, we record brain activity for a certain number of trials (words). Brain activity is recorded using electrodes across various brain regions. The data for each brain region is saved separately.

Hippocampal (HPC) ripples are stored in an array of the following shape: (num_trials x num_electrodes_hpc) x num_timesteps. Here, num_trials is the number of trials, num_electrodes is the number of electrodes in that brain region, and num_timesteps is the number of timesteps that neural data was recorded for.

Let's load the HPC data from one session and see how this checks out.

In [4]:
session_events_hpc = pd.DataFrame()
ripple_array_hpc = []
sub_sess_names_hpc = []
sub_names_hpc = []
trial_nums_hpc = []
elec_ripple_rate_array_hpc = []

fn_hpc = '/scratch/john/SWR_scratch/ENCODING/SWR_FR1_R1341T_2_HPC_encoding_soz_in_hamming.p'
with open(fn_hpc,'rb') as f:
    dat = pickle.load(f)
    session_events_hpc = session_events_hpc.append(dat['session_events'])
    ripple_array_hpc = superVstack(ripple_array_hpc,dat['ripple_array'])
    sub_sess_names_hpc.extend(dat['sub_sess_names'])
    sub_names_hpc.extend(dat['sub_names'])
    hpc_trialnums = np.append(trial_nums_hpc,dat['trial_nums'])
    elec_ripple_rate_array_hpc.extend(dat['elec_ripple_rate_array'])
    hpc_regions = dat['HPC_names']
    
print(f"Ripple array shape: {ripple_array_hpc.shape}")
print(f"Electrode regions: {hpc_regions}")
print(f"Trials recorded from each electrode: {hpc_trialnums}")

Ripple array shape: (900, 1500)
Electrode regions: ['left sub', 'left dg', 'right dg']
Trials recorded from each electrode: [300. 300. 300.]


In [5]:
print(f"Number of timepoints where a ripple occurred: {np.sum(ripple_array_hpc)}")

Number of timepoints where a ripple occurred: 8565.0


We can see that for this given session, the participant encoded 300 words. While doing so, brain activity was recorded from 3 different subregions in HPC. That equates to a total of 900 trials.



In [8]:
timesteps = ripple_array_hpc.shape[1] # timesteps over which recording is taken 
num_trials = np.sum(hpc_trialnums) # number of trials collected from area2 


# Step 1: Separate hpc right and left ripples
# only select ripples from self.ripple_regions
left_hemi_data = []
right_hemi_data = []
trialStart = 0
for region, trials in zip(hpc_regions, hpc_trialnums):
    trials = int(trials)
    if f'right' in region: 
        right_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
    if f'left' in region: 
        left_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
    trialStart += trials 
    
# describing procedure for left_hemi_data, but it is the exact same for right_hemi_data

# left_hemi_data is a list where each entry is of size trials x timesteps
# each entry contains ripples from that hemisphere for a hpc region specified in 
# self.ripple_regions

# np.stack(left_hemi_data) results in an array of shape N x trials x timesteps
# where N is the number of electrodes in self.ripple_regions in that hemisphere
# summing across the first axis then gives us a trials x timesteps array. 
# where each value in the array is the number of ripples that occurred at that time 
# across the N electrodes. Because we don't care if multiple ripples occurred at the same 
# time, we clip this array to have a max value of 1. 
# If there are no ripples in a hemi, then fill an array of that shape with all zeros. 

if len(right_hemi_data) > 0:
    rhd_np = np.clip(np.sum(np.stack(right_hemi_data),axis=0), a_min=0, a_max=1)
else:
    rhd_np = np.zeros((trials, timesteps))
if len(left_hemi_data) > 0:
    lhd_np = np.clip(np.sum(np.stack(left_hemi_data),axis=0), a_min=0, a_max=1)
else:
    lhd_np = np.zeros((trials, timesteps))
    
timesteps = ripple_array_hpc.shape[1] # timesteps over which recording is taken 
num_trials = np.sum(hpc_trialnums) # number of trials collected from area2 

ripples_hpc_area2shape = np.zeros((int(num_trials), timesteps))
trialStart = 0
for region, trials in zip(hpc_regions, hpc_trialnums):
    
    trials = int(trials)
    if 'right' in region:
        ripples_hpc_area2shape[trialStart:trialStart+trials] = rhd_np
    if 'left' in region:
        ripples_hpc_area2shape[trialStart:trialStart+trials] = lhd_np
    
    trialStart += trials 
                

In [9]:
print(f"Number of timepoints where a ripple occurred: {np.sum(ripples_hpc_area2shape)}")

Number of timepoints where a ripple occurred: 12505.0


Suppose we're interested in analyzing amygdala (AMY) high-frequency activity data when a ripple happens in the CA1 region of HPC. Let's load the AMY data first to get started. 

In [14]:
session_events_amy = pd.DataFrame()
ripple_array_amy = []
sub_sess_names_amy = []
sub_names_amy = []
trial_nums_amy = []
elec_ripple_rate_array_amy = []

fn_amy = '/scratch/john/SWR_scratch/ENCODING/SWR_catFR1_R1015J_0_AMY_encoding_soz_in_hamming.p'
with open(fn_amy,'rb') as f:
    dat = pickle.load(f)
    print(len(dat['ripple_array']))
    print(np.stack(dat['ripple_array']).shape)
    session_events_amy = session_events_amy.append(dat['session_events'])
    ripple_array_amy = superVstack(ripple_array_amy,dat['ripple_array'])
    sub_sess_names_amy.extend(dat['sub_sess_names'])
    sub_names_amy.extend(dat['sub_names'])
    trial_nums_amy = np.append(trial_nums_amy,dat['trial_nums'])
    elec_ripple_rate_array_amy.extend(dat['elec_ripple_rate_array']) 
    regions_amy = dat['HPC_names']
    
print(f"Ripple array shape: {ripple_array_amy.shape}")
print(f"Electrode regions: {regions_amy}")
print(f"Trials recorded from each electrode: {trial_nums_amy}")

240
(240, 1500)
Ripple array shape: (240, 1500)
Electrode regions: ['left amy', 'left amy']
Trials recorded from each electrode: [120. 120.]


We can see that AMY also has a ripple array, but we're not interested in this because we want to condition on CA1 ripples from the same hemisphere. Let's take only CA1 ripples in the next cell, and separate them based on the hemisphere they are in.

In [14]:
hpc_subregions_selected = ['ca1'] # only want ripples from this region of hpc 

# loop through hpc regions
# store ripples from the selected
left_hemi_data = []
right_hemi_data = []
trialStart = 0
for region, trials in zip(regions_hpc, trial_nums_hpc):
    trials = int(trials)
    for sb in hpc_subregions_selected:
        if f'left {sb}' in region: 
            left_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
        if f'right {sb}' in region: 
            right_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
        
        
print(f"CA1 ripple array left hemisphere shape: {np.stack(left_hemi_data).shape}")

CA1 ripple array left hemisphere shape: (4, 120, 1500)


We can see that, for the left hemisphere, we have an array of shape num_electrodes x num_trials x num_timesteps. Here, num_electrodes is the number of electrodes from selected subregions from that hemisphere. 

In [17]:
def ripples_hpc(nonhpc_trialnums, nonhpc_regions, hpc_trialnums, hpc_regions, ripple_array_hpc, regions_selected):

    
    timesteps = ripple_array_hpc.shape[1]
    num_trials = np.sum(nonhpc_trialnums)

    # Step 1: Separate hpc right and left ripples
    left_hemi_data = []
    right_hemi_data = []
    trialStart = 0
    for region, trials in zip(hpc_regions, hpc_trialnums):
        trials = int(trials)
        for rs in regions_selected:
            if f'right {rs}' in region: 
                right_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
            if f'left {rs}' in region: 
                left_hemi_data.append(ripple_array_hpc[trialStart:trialStart+trials])
        trialStart += trials 

    if len(right_hemi_data) > 0:
        rhd_np = np.clip(np.sum(np.stack(right_hemi_data),axis=0), a_min=0, a_max=1)
    else:
        rhd_np = np.zeros_like((trials, timesteps))
    if len(left_hemi_data) > 0:
        lhd_np = np.clip(np.sum(np.stack(left_hemi_data),axis=0), a_min=0, a_max=1)
    else:
        lhd_np = np.zeros_like((trials, timesteps))


    ripples_hpc_nonhpcshape = np.zeros((int(num_trials), timesteps))

    trialStart = 0
    for region, trials in zip(nonhpc_regions, nonhpc_trialnums):
        trials = int(trials)
        
        if 'right' in region:
            ripples_hpc_nonhpcshape[trialStart:trialStart+trials] = rhd_np
        if 'left' in region:
            ripples_hpc_nonhpcshape[trialStart:trialStart+trials] = lhd_np

        trialStart += trials 

    return ripples_hpc_nonhpcshape

