# Data loading

##  Loading Data

### Amplifiers

In [None]:
from pathlib import Path

# This has to be modified for experiment_path
experiment_path = Path("/media/heberto/One Touch/DiCarlo-CN-data-share/exp_domain-transfer-2023/exp_domain-transfer-2023.sub_pico/raw_files/intanraw")
assert experiment_path.is_dir()
session_folder =  experiment_path / "pico_domain-transfer-2023_230215_161322"
#session_folder = experiment_path / "pico_domain-transfer-2023_230214_140610"  # This file has a timestamp problem
assert session_folder.is_dir()

file_path = session_folder / "info.rhd"
assert file_path.is_file()

In [None]:
from spikeinterface.extractors import IntanRecordingExtractor


recording = IntanRecordingExtractor(file_path=file_path, stream_name="RHD2000 amplifier channel", all_annotations=True)
recording 

### Auxiliary input

In [None]:
recording_auxiliary_input = IntanRecordingExtractor(
    file_path=file_path,
    stream_name="RHD2000 auxiliary input channel",
    all_annotations=True,
)

recording_auxiliary_input

### ADC input

In [None]:
recording_adc_input = IntanRecordingExtractor(
    file_path=file_path,
    stream_name="USB board ADC input channel",
    all_annotations=True,
)

recording_adc_input

### Digital channel 
Requires merging this [PR](https://github.com/NeuralEnsemble/python-neo/pull/1476) on neo library
at the moment

In [None]:
recording_digital = IntanRecordingExtractor(
    file_path=file_path,
    stream_name="USB board digital input channel",
    all_annotations=True,
)

recording_digital

# Pipeline

## Artificial data

In [None]:
import spikeinterface.widgets as sw

from spikeinterface.core.generate import generate_ground_truth_recording


recording, sorting = generate_ground_truth_recording(num_channels=4, num_units=1, durations=[1], seed=0)


w_ts = sw.plot_traces(recording, time_range=(0, 1))
w_rs = sw.plot_rasters(sorting, time_range=(0, 1))



In [None]:
import numpy as np

from spikeinterface.core.job_tools import ChunkRecordingExecutor
from dicarlo_lab_to_nwb.conversion.pipeline import calculate_peak_in_chunks, init_method


job_name = "DiCarloPeakDetectionPipeline"
job_kwargs = dict(n_jobs=1, verbose=True, progress_bar=True, chunk_duration=1.0)
noise_threshold = 3  # The number of standard deviations for peak detection
init_args = (recording, noise_threshold)   
processor = ChunkRecordingExecutor(
    recording,
    calculate_peak_in_chunks,
    init_method,
    init_args,
    handle_returns=True,
    job_name=job_name,
    **job_kwargs,
)


values = processor.run()
spike_times_per_channel = {}

number_of_chunks = len(values)
number_of_channels = recording.get_num_channels()

for channel_index in range(number_of_channels):
    channel_spike_times = [times[channel_index] for times in values]
    spike_times_per_channel[channel_index] = np.concatenate(channel_spike_times)
    


In [None]:
sorting.get_unit_spike_train(0, return_times=True)

In [None]:
spike_times_per_channel[0]

## Your data

In [None]:
from pathlib import Path

import spikeinterface.widgets as sw
from dicarlo_lab_to_nwb.conversion.pipeline import DiCarloBandPass, DiCarloNotch, calculate_peak_in_chunks, init_method
from spikeinterface.extractors import IntanRecordingExtractor


# This has to be modified for experiment_path
experiment_path = Path(
    "/media/heberto/One Touch/DiCarlo-CN-data-share/exp_domain-transfer-2023/exp_domain-transfer-2023.sub_pico/raw_files/intanraw"
)
assert experiment_path.is_dir()
session_folder = experiment_path / "pico_domain-transfer-2023_230215_161322"
# session_folder = experiment_path / "pico_domain-transfer-2023_230214_140610"  # This file has a timestamp problem
assert session_folder.is_dir()

file_path = session_folder / "info.rhd"
assert file_path.is_file()


recording = IntanRecordingExtractor(
    file_path=file_path,
    stream_name="RHD2000 amplifier channel",
    all_annotations=True,
)

recording
w_ts = sw.plot_traces(recording, time_range=(0, 1), return_scaled=True)


#### Preprocess

In [None]:
f_notch = 50  # Hz
bandwidth = 10
f_low = 300.0
f_high = 6000.0

notched_recording = DiCarloNotch(recording, f_notch=f_notch, bandwidth=bandwidth)
preprocessed_recording = DiCarloBandPass(notched_recording, f_low=f_low, f_high=f_high)

# For this instance each array 96 channels, 400 micrometes apart
w_ts = sw.plot_traces(preprocessed_recording, time_range=(0, 1), return_scaled=True)

#### Run half a minute of the data

In [None]:
from spikeinterface.core.job_tools import ChunkRecordingExecutor
import numpy as np

samples_in_a_minute = recording.get_sampling_frequency() * 30.0
recording_first_minute = preprocessed_recording.frame_slice(start_frame=0, end_frame=samples_in_a_minute)

job_name = "DiCarloPeakDetectionPipeline"
job_kwargs = dict(n_jobs=1, verbose=True, progress_bar=True, chunk_duration=5.0)
noise_threshold = 3  # The number of standard deviations for peak detection


init_args = (recording_first_minute, noise_threshold)   
processor = ChunkRecordingExecutor(
    recording_first_minute,
    calculate_peak_in_chunks,
    init_method,
    init_args,
    handle_returns=True,
    job_name=job_name,
    **job_kwargs,
)


values = processor.run()
spike_times_per_channel = {}

number_of_chunks = len(values)
number_of_channels = recording.get_num_channels()

for channel_index in range(number_of_channels):
    channel_spike_times = [times[channel_index] for times in values]
    spike_times_per_channel[channel_index] = np.concatenate(channel_spike_times)
        
spike_times_per_channel

In [None]:
spike_times_per_channel[0] * 1000.0 # ms

In [None]:
spike_times_per_channel[0].size