## Main notebook for the preprocessing-sorting-postprocessing pipeline with SpikeInterface

### Imports

In [34]:
import numpy as np
from pathlib import Path
import spikeinterface.full as si
from probeinterface import read_probeinterface
from adc_shifts import adc_shifts

%matplotlib widget

### Parameters

In [5]:
# Data loading
num_channels = 384
sampling_frequency = 2500
# see this thread https://community.brain-map.org/t/using-the-raw-data/1898 for gain scaling factor and dtype
gain_recording = 4.69
offset_recording = 0
dtype = np.int16

# Main working directory
base_folder = Path.home() / 'RANCZLAB-NAS/data/ONIX/mouse_343/mouse_343_2024-06-20T131349Z' 
# Input data
binary_filename = 'LFP.bin'

# Output sorting
output_folder = base_folder / 'kilosort3_sorting_files'
sorting_save_path = base_folder / 'kilosort3_sorting_output'

# Preprocessing
highpass_frequency = 1 # Hz

# Sorting
sorter_name = 'hdsort'
sorter_path = Path.home() / 'RANCZLAB-NAS' / 'spikesorter_images' / f'{sorter_name}-compiled-base.sif'
output_folder = Path.home() / 'RANCZLAB-NAS/output_sorter/mouse_343'
remove_existing_folder = True

# Other
backend = 'ipywidgets'

### Loading the data

In [10]:
recordings = {}
recordings['raw'] = si.read_binary(base_folder / binary_filename, sampling_frequency=sampling_frequency, dtype=dtype, num_channels=num_channels, gain_to_uV=1.0, offset_to_uV=0)
recordings['raw'] = si.scale(recordings['raw'], gain=gain_recording, offset=offset_recording) #scale to uV

probegroup = read_probeinterface('np_json_files/np1.json')
probe = probegroup.probes[0]
recordings['raw'] = recordings['raw'].set_probe(probe)

recordings['raw']

ScaleRecording: 384 channels - 2.5kHz - 1 segments - 546,306 samples - 218.52s (3.64 minutes) 
                float32 dtype - 800.25 MiB

### Preprocessing

In [12]:
# high pass signal
recordings['highpass'] = si.highpass_filter(recordings['raw'], freq_min=highpass_frequency)

recordings['bandpass'] = si.bandpass_filter(recordings['highpass'], freq_min=1, freq_max=6000)

# perform the phase shift (similar to IBL destriping or `tshift` option in CatGT):
recordings['phase_shift'] = si.phase_shift(recordings['bandpass'], inter_sample_shift=adc_shifts()[0])

# # detect noisy, dead, and out-of-brain channels
# bad_channel_ids, channel_labels = si.detect_bad_channels(recordings['phase_shift'])
# recordings['good_channels'] = recordings['phase_shift'].remove_channels(remove_channel_ids=bad_channel_ids)
# print('Channels removed:', {k: channel_labels[k] for k in bad_channel_ids})
# print(len(bad_channel_ids))
# num_channels= 384-len(bad_channel_ids)
# print(num_channels)

# subtract the median across all channels
recordings['common_reference'] = si.common_reference(recordings['phase_shift'], operator="median", reference="global")

### Run sorting

In [149]:
sorting = si.run_sorter(sorter_name, recording=recordings['common_reference'], output_folder=output_folder, singularity_image=sorter_path, verbose=True, remove_existing_folder=remove_existing_folder)
sorting.save(folder=output_folder/'si_save', format='npz_folder')

Starting container
Installing spikeinterface from sources in /home/jupyter-nora/RANCZLAB-NAS/spikesorter_images/hdsort-compiled-base.sif
Installing dev spikeinterface from remote repository
Running hdsort sorter inside /home/jupyter-nora/RANCZLAB-NAS/spikesorter_images/hdsort-compiled-base.sif
Stopping container




NpzFolderSorting: 378 units - 1 segments - 30.0kHz

### Postprocessing / exporting

In [152]:
# the waveforms are sparse so it is faster to export to phy
we = si.extract_waveforms(recording=recordings['common_reference'], sorting=sorting, folder='waveforms', unit_batch_size=800)

# compute some metrics needed for this module:
spike_amplitudes = si.compute_spike_amplitudes(waveform_extractor=we)
principal_components = si.compute_principal_components(waveform_extractor=we,
                                 n_components=5,
                                 mode='by_channel_global')

# save the data in a specified location
si.export_to_phy(waveform_extractor=we, 
              output_folder=output_folder/'phy_folder')

extract waveforms shared_memory multi buffer:   0%|          | 0/154 [00:00<?, ?it/s]

extract waveforms memmap multi buffer:   0%|          | 0/154 [00:00<?, ?it/s]

extract amplitudes:   0%|          | 0/154 [00:00<?, ?it/s]

Fitting PCA:   0%|          | 0/378 [00:00<?, ?it/s]

Projecting waveforms:   0%|          | 0/378 [00:00<?, ?it/s]

write_binary_recording:   0%|          | 0/154 [00:00<?, ?it/s]

extract PCs:   0%|          | 0/154 [00:00<?, ?it/s]

Run:
phy template-gui  /home/jupyter-nora/RANCZLAB-NAS/output_sorter/mouse_343/phy_folder/params.py
