<div class="alert alert-block alert-info">
This script merges all eeg files for a session and splits them back according to session timestamps. The Jupyer Notebook works well for a single
session/troubleshooting, and the Python script is better for multiple sessions.
</div>

In [17]:
import numpy as np
import os
import csv
import json
import argparse

## Variables
These variables may change:

In [18]:
animal_name='JC283'
date='20220920'

basedir = "/mnt/adata11/"
mbasedir="/adata_pool/merged/"+animal_name+'-'+date+'/'

These variables do not change:

In [19]:
sample_rate_res_old=24000

sample_rate_whl=39.0625
sample_rate_res=20000

sample_rate_eegh=5000

downsampled_res=sample_rate_res/sample_rate_res_old

## Import session metadata:

In [20]:
session_metadata = {}
with open('session_metadata.csv', mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file, delimiter=";")    
    for line in reader:
        session_id = line.pop('session_id')
        session_metadata[session_id] = line

In [21]:
basename = animal_name+'-'+date
print('Metadata for',basename)
session_metadata[basename]

Metadata for JC283-20220920


{'num_tetr': '25',
 'last_pfc_left': '8',
 'last_pfc_right': '16',
 'session_names': "['presleep','training1','intersleep','training2','postsleep']",
 'session_idx': '[[1],[2,3,4],[5],[6,7,8],[9]]',
 'reward_arms': '7,2',
 'rewards': 'C,S'}

In [22]:
num_tetrodes = int(session_metadata[basename]['num_tetr'])

session_idx = json.loads(session_metadata[basename]['session_idx'])

session_names_str = session_metadata[basename]['session_names']
session_names_str = session_names_str.replace("'",'"') # the single quotes aren't being read in JSON, but I can't use " in the csv because it's a str delimiter
session_names = json.loads(session_names_str)

reward_arms_str = session_metadata[basename]['reward_arms']
reward_arms = np.array(list(map(int, reward_arms_str.split(','))))

## Merge and split eegh files and export reward arm files

In [23]:
def find_files_with_eegh(directory):
    eegh_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if 'eegh' in file:
                eegh_files.append(os.path.join(root, file))
                
    return eegh_files

In [24]:
# basedir='D:\github/test/'
# mbasedir="D:\github/test\merged"+animal_name+'-'+date+'/'
if not os.path.isdir(mbasedir):
    os.makedirs(mbasedir)
directory_path_eegh = basedir+"eeg/"+animal_name+'/'+date+'/'

In [25]:
# find all eegh files in a session
eegh_files = sorted(find_files_with_eegh(directory_path_eegh)) # sort the filenames numerically

# initialize a merged eegh file and add the first file to it
eegh_merged=np.fromfile(eegh_files[0], dtype=np.int16)
print('Reshaping',eegh_files[0],'from length',eegh_merged.shape)
# reshape the first file (1D to 2D array) so that each row corresponds to one tetrode
eegh_merged= eegh_merged.reshape(int(len(eegh_merged)/num_tetrodes),num_tetrodes)
print('New eegh_merged shape:',eegh_merged.shape)

# iterate over the other files and add them to the large merged file
for eegh_file_i in range(1,len(eegh_files)):
    eegh_t=np.fromfile(eegh_files[eegh_file_i], dtype=np.int16)
    print('Reshaping',eegh_files[eegh_file_i], 'from length',eegh_t.shape)
    eegh_t= eegh_t.reshape(int(len(eegh_t)/num_tetrodes),num_tetrodes)
    print('New eegh_merged shape:',eegh_t.shape)
    eegh_merged=np.append(eegh_merged,eegh_t,axis=0)

print('Final eegh_merged shape:',eegh_merged.shape)
    
# calculate the total length (num timestamps) of the merged eegh file
length_eegh_merged=eegh_merged.shape[0]

Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_01.eegh from length (450000000,)
New eegh_merged shape: (18000000, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_02.eegh from length (151888000,)
New eegh_merged shape: (6075520, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_03.eegh from length (86704000,)
New eegh_merged shape: (3468160, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_04.eegh from length (123772800,)
New eegh_merged shape: (4950912, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_05.eegh from length (338201600,)
New eegh_merged shape: (13528064, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_06.eegh from length (165008000,)
New eegh_merged shape: (6600320, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_07.eegh from length (85667200,)
New eegh_merged shape: (3426688, 25)
Reshaping /mnt/adata11/eeg/JC283/20220920/JC283-20220920_08.eegh from length (101318400,)
New eegh_merged shape

In [26]:
# load the session timestamps and downsample them
session_timestamps=np.loadtxt(basedir+"processing/"+animal_name+'/'+date+'/'+'session_shifts.txt')

session_timestamps=np.append([0],session_timestamps) # start the first timestamp at 0
session_timestamps_down=session_timestamps*downsampled_res
print('Resampled session timestamps:',session_timestamps)

for session_idx_i in range(len(session_idx)):
    # generate reward arms files for the training session files
    if session_names[session_idx_i]=='training1' or session_names[session_idx_i]=='training2':
        np.savetxt(mbasedir+animal_name+'-'+date+'_'+session_names[session_idx_i]+'.reward_arms', reward_arms, fmt='%i',newline=" ")
    
    # cut the eegh files according to the session timestamps and write them to the merged folder
    start_cut=session_idx[session_idx_i][0]-1
    end_cut=session_idx[session_idx_i][-1]
    start_eegh=int(session_timestamps_down[start_cut]/sample_rate_res*sample_rate_eegh)
    end_eegh=int(session_timestamps_down[end_cut]/sample_rate_res*sample_rate_eegh)

    eegh_temp=eegh_merged[start_eegh:end_eegh,:]

    eegh_temp.tofile(mbasedir+animal_name+'-'+date+'_'+session_names[session_idx_i]+'.eegh')

Resampled session timestamps: [0.00000000e+00 8.64001470e+07 1.15562694e+08 1.32210141e+08
 1.55974938e+08 2.20909785e+08 2.52591732e+08 2.69039979e+08
 2.88493476e+08 3.79945023e+08]
