In [489]:
%matplotlib widget
from datetime import datetime
from zoneinfo import ZoneInfo
from pathlib import Path
from neuroconv import ConverterPipe
from neuroconv.datainterfaces import DeepLabCutInterface, VideoInterface
import pandas as pd
import numpy as np
import pprint

from matplotlib import pyplot as plt
fig_folder = Path("/Users/vigji/Desktop/test_synch")

In [2]:
def _find_in_folder(folder, pattern):
    return next(folder.glob(pattern))

## Videos and DLC

In [171]:
def _copy_and_update_config_file(config_file_path, video_file_path, dest_dir=None, overwrite=False):
    # Copy config file to data folder:
    config_file_path = Path(config_file_path)
    video_file_path = Path(video_file_path)
    if dest_dir is None:
        dest_dir = video_file_path.parent
    else:
        dest_dir = Path(dest_dir)

    config_file_path_copy = dest_dir / config_file_path.name

    if not overwrite and config_file_path_copy.exists():
        print(f"Config file already exists in {config_file_path_copy}.")
        return config_file_path_copy

    config_file_text = config_file_path.read_text()
    # find crop by looking for content in between crop: and \n:
    crop = config_file_text.split("crop: ")[1].split("\n")[0]
    config_file_text = config_file_text.replace("\nvideo_sets:\n  ", 
                            f"\nvideo_sets:\n  {video_file_path}:\n    crop: {crop}\n  ")
    config_file_path_copy.write_text(config_file_text)

    return config_file_path_copy

mid = "M21"
day = "20240421"
ses = "165242"
data_folder = Path(f"/Users/vigji/Desktop/test_mpa_dir/P02_MPAOPTO_LP/e02_ephys-contrapag-stim/{mid}/{day}/{ses}")
video_data = {"eye": {"name": "EyeCamera",
                      "description": "Magnified view of the animal's pupil.", 
                      "dlc_model_path": "/Volumes/SystemsNeuroBiology/SNeuroBiology_shared/DLC_models/eye-pupil-Luigi Petrucco-2023-12-16/config.yaml"},
              "top": {"name": "TopCamera",
                      "description": "Top view of the animal's head and body in the setup.", 
                      "dlc_model_path": "/Volumes/SystemsNeuroBiology/SNeuroBiology_shared/DLC_models/top-cam-Luigi Petrucco-2023-12-16/config.yaml"},
                      }

path_to_save_nwbfile = data_folder / f"{mid}_{day}_{ses}.nwb" 

dlc_interfaces = []
video_interfaces = []
video_dicts = []
for i_cam, camera in  enumerate(["eye", "top"]):
    dlc_file_path = _find_in_folder(data_folder, f"*{camera.title()}*.h5")
    video_file_path = _find_in_folder(data_folder, f"*{dlc_file_path.stem.split('DLC')[0]}*.avi")
    timestamps_file_path = _find_in_folder(data_folder, f"*{dlc_file_path.stem.split('DLC')[0][-10:]}*.csv")
    config_file_path = video_data[camera]["dlc_model_path"]
    # update config file with video path:
    config_file_path = _copy_and_update_config_file(config_file_path, video_file_path)

    # load framerate csv:
    framerate_df = pd.read_csv(timestamps_file_path)
    # convert to datetime the Timestamp column strings to seconds from first timestamp:
    timestamps = pd.to_datetime(framerate_df["Timestamp"])
    timestamps = (timestamps - timestamps[0]).dt.total_seconds().values

    dlc_interface = DeepLabCutInterface(file_path=dlc_file_path, config_file_path=config_file_path, subject_name=mid, verbose=False)
    dlc_interface.set_aligned_timestamps(timestamps)

    dlc_interfaces.append(dlc_interface)

    video_interface = VideoInterface(file_paths=[video_file_path], 
                                     # timestamps=[timestamps],
                                     verbose=False, 
                                     metadata_key_name=video_data[camera]["name"])
    print(len(timestamps))
    video_interface.set_aligned_timestamps([timestamps])
    video_interfaces.append(video_interface)
    video_dict = {"file_path": video_file_path, 
                  "description": video_data[camera]["description"], 
                  "name": video_data[camera]["name"],
                  "interface_num": f"00{i_cam+1}",
                  "timestamps": timestamps,}
    video_dicts.append(video_dict)


Config file already exists in /Users/vigji/Desktop/test_mpa_dir/P02_MPAOPTO_LP/e02_ephys-contrapag-stim/M21/20240421/165242/config.yaml.
268646
Config file already exists in /Users/vigji/Desktop/test_mpa_dir/P02_MPAOPTO_LP/e02_ephys-contrapag-stim/M21/20240421/165242/config.yaml.
268646


video_interface = VideoInterface(file_paths=[v["file_path"] for v in video_dicts], verbose=False)
video_interface.set_aligned_timestamps([v["timestamps"] for v in video_dicts])
video_metadata = video_interface.get_metadata()
for video_dict, cam_metadata in zip(video_dicts, video_metadata["Behavior"]["Videos"]):
    cam_metadata.update({k: video_dict[k] for k in ["name", "description"]})
video_metadata.to_dict()

In [22]:
conv_pipe = ConverterPipe([*video_interfaces, *dlc_interfaces, ], 
                          verbose=True)  

# remove file if it exists:
if path_to_save_nwbfile.exists():
    path_to_save_nwbfile.unlink()

metadata = conv_pipe.get_metadata()
# For data provenance we add the time zone information to the conversion
session_start_time = datetime(2020, 1, 1, 12, 30, 0, tzinfo=ZoneInfo("US/Pacific"))
metadata["NWBFile"].update(session_start_time=session_start_time)
# pprint.pprint(metadata.to_dict())
# Choose a path for saving the nwb file and run the conversion
conversion_options = {f"DeepLabCutInterface{v['interface_num']}": dict(container_name=v["name"]) for v in video_dicts}
conv_pipe.run_conversion(nwbfile_path=path_to_save_nwbfile, metadata=metadata, conversion_options=conversion_options)

Metadata is valid!
conversion_options is valid!


  df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
  for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
  df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
  for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. "
  warn("%s '%s': Length of data does not match length of timestamps. Your data may be 

NWB file saved at /Users/vigji/Desktop/test_mpa_dir/M21/20240421/165242/M21_20240421_165242.nwb!


In [50]:
video_file_path = video_file_path

## Timeseries

In [391]:
from neuroconv.datainterfaces import CsvTimeIntervalsInterface
from datetime import datetime
debug_plots = False

file_path = _find_in_folder(data_folder, "*laser-log*.csv")

# Change the file_path to the location of the file in your system
interface = CsvTimeIntervalsInterface(file_path=file_path, verbose=False)

# Extract what metadata we can from the source files
metadata = interface.get_metadata()
# Add the time zone information to the conversion
session_start_time = datetime(2020, 1, 1, 12, 30, 0, tzinfo=ZoneInfo("US/Pacific"))
metadata["NWBFile"] = dict(session_start_time=session_start_time)

nwbfile_path = f"{path_to_save_nwbfile}" # This should be something like: "./saved_file.nwb"
nwbfile = interface.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata)

ValueError: df must contain a column named 'start_time'. Existing columns: ['Value', 'Timestamp']

## Get timestamps from digital logs

In [400]:
import spikeinterface.full as si
from labnpx.digital_signal import DigitalSignal
from labnpx.barcode_signal import BarcodeSignal
from datetime import datetime, timedelta

def _read_oephys_datetime(oephys_folder, timezone="Europe/Rome"):
    """Read the datetime of the recording from the sync_messages.txt file in the oephys folder."""

    synch_txt_file = _find_in_folder(oephys_folder, "*/*/*/sync_messages.txt")

    with open(synch_txt_file, "r") as f:
        line = f.readline()

    # get content between : and \n:
    start_msec_utc = int(line.split(": ")[1].split("\n")[0])
    return datetime.fromtimestamp(start_msec_utc / 1000, ZoneInfo(timezone))

def _load_signals_from_daq_interface(interface, dwnsamp=4):
    CHANNELS_MAP = {0: 'frames', 1: 'laser', 2: '-', 3: 'motor', 4: 'barcode', 5: '-', 6: '-', 7: '-'}
    THR = 10000  # for analog reading of digital signals

    fs = interface.sampling_frequency

    assert fs % dwnsamp == 0, "The downsampling factor must be a divisor of the original sampling frequency."
    target_fs = int(fs / dwnsamp)

    traces_array = np.array(digital_traces_interface.get_traces()[::dwnsamp, :])

    # Create a DigitalSignal objects dictionary for valid channels:
    digital_signals = {}
    for idx in range(8):
        if CHANNELS_MAP[idx] != '-':
            digital_signals[CHANNELS_MAP[idx]] = DigitalSignal(traces_array[:, idx] > THR, fs=target_fs)
    
    return digital_signals

def _validate_log_timestamps(trials_df, trials_digital_onsets, tolerance=0.1):
    """Function to validate trial timestamps from csv log and digital signals.

    Parameters
    ----------
    trials_df : pd.DataFrame
        Dataframe with trial timestamps, with a "Timestamp" column.
    trials_digital_onsets : np.ndarray
        Array with trial onsets from digital signals.
    tolerance : float, optional
        Max tolerated time mismatch, by default 0.1
    """
    assert len(trials_df) == len(trials_digital_onsets), "Number of trials in laser log does not match number of trials in digital signals"

    # convert timestamp from 2024-04-21T16:58:03.6325504+02:00 format:
    trial_onsets_df = pd.to_datetime(trials_df["serial-timestamp"])
    trial_intervals_df = trial_onsets_df.diff().dt.total_seconds().values[1:]
    # compare log and digital signal derived trial intervals:
    assert np.allclose(trial_intervals_df, np.diff(trials_digital_onsets), atol=tolerance), \
        "Trial intervals from log and digital signals do not match"


oephys_folder = _find_in_folder(data_folder, "NPX/[0-9]*")
digital_traces_interface = si.read_openephys(oephys_folder, stream_name="Record Node 103#NI-DAQmx-102.USB-6212 (BNC)")
oephys_datetime = _read_oephys_datetime(oephys_folder)

digital_signals = _load_signals_from_daq_interface(digital_traces_interface)

if debug_plots:
    t_lims = (1000, 1060)
    t_slice = slice(target_fs*t_lims[0], target_fs*t_lims[1])
    plt.figure(figsize=(10, 5))
    for idx in range(8):
        plt.plot(traces_array[t_slice, idx]+20000*idx, lw=0.5)

#### Camera frames

In [377]:
# get camera frames triggers, and raise relative warnings if needed
camera_timestamps = digital_signals["frames"].onsets_times
assert camera_timestamps[0] > 1, "Camera frames trigger starts too early, could have missed something"
# assert camera_timestamps[-1] < (digital_signals["frames"].duration - 1), "Camera frames trigger ends too late, could have missed something"

#### Laser

In [392]:
ASSUMED_TRIAL_LENGTH_S = 6.2
MIN_TRIAL_DISTANCE_S = 1
TOLERANCE_S = 0.3

def _load_laser_log_csv(data_folder, digital_log_signal, 
                        min_trial_distance_s=MIN_TRIAL_DISTANCE_S, tolerance_s=TOLERANCE_S):
    """Load laser log csv file and validate and add columns with digital signal timestamps."""
    COLUMN_NAME_REMAP = {"Timestamp": "serial-timestamp"}
    
    laser_log_file = _find_in_folder(data_folder, "*laser-log*.csv")
    df = pd.read_csv(laser_log_file)
    df.rename(columns=COLUMN_NAME_REMAP, inplace=True)

    # df.reset_index(drop=True, inplace=True)
    for i, content in enumerate(["frequency", "pulse-width_ms", "stim-duration_ms"]):
        df[content] = df["Value"].apply(lambda x: x.split(";")[i])

    # get laser triggers
    laser_onsets = digital_log_signal.onsets_times
    laser_offsets = digital_log_signal.offsets_times

    # trial onsets/offsets are the onsets which distance to the previous/next onset/offset 
    # is greater than 0.5 s, plus the first onset.
    laser_trial_onsets = np.concatenate([[laser_onsets[0]], laser_onsets[1:][np.diff(laser_onsets) > min_trial_distance_s]])
    laset_trial_offsets = np.concatenate([laser_offsets[:-1][np.diff(laser_onsets) > min_trial_distance_s], [laser_offsets[-1]]])

    # read laser log and cross check number of trials:

    _validate_log_timestamps(df, laser_trial_onsets, tolerance=tolerance_s)

    df["start_time_bnc-time"] = laser_trial_onsets
    df["end_time_bnc-time"] = laset_trial_offsets

    return df.drop("Value", axis=1)

laser_log_df = _load_laser_log_csv(data_folder, digital_signals["laser"])

#### Motor

In [393]:
def _load_motor_log_csv(data_folder, digital_log_signal, assumed_trial_len_s=ASSUMED_TRIAL_LENGTH_S,
                        tolerance_s=TOLERANCE_S):
    """Load motor log csv file and validate and add columns for digital signal timestamps."""

    # fix log column names:
    COLUMN_NAME_REMAP = {"Value.Radius": "motor-order", 
                         "Value.Theta": "radius_cm", 
                         "Value.Direction": "theta_rad",
                         "Timestamp": "serial-timestamp"}

    # get motor triggers
    motor_onsets = digital_log_signal.onsets_times
    motor_offsets = digital_log_signal.offsets_times

    # motor trial start times are one onset every 2 offsets; motor trial end times are one offset every 2 onsets.
    # First movement is the initial reset and is not a trial.
    motor_trial_onsets = motor_onsets[1::2]
    motor_trial_offsets = motor_offsets[2::2]
    approach_end = motor_offsets[1::2]
    depart_start = motor_onsets[2::2]

    # check that all trials are around 6.2 seconds long:
    assert np.allclose(motor_trial_offsets - motor_trial_onsets, assumed_trial_len_s, atol=tolerance_s), "Some motor trials are not around 6.2 seconds long"

    # double check with motor log:
    motor_log_file = _find_in_folder(data_folder, "*motor-log*.csv")
    df = pd.read_csv(motor_log_file)
    df.rename(columns=COLUMN_NAME_REMAP, inplace=True)
    # trials are one every two entries, skipping the first one:
    df = df.loc[1::2, :]
    assert len(motor_trial_onsets) == len(df), "Number of trials in motor log does not match number of trials in digital signals"

    _validate_log_timestamps(df, motor_trial_onsets, tolerance=tolerance_s)

    df["start_time_bnc-time"] = motor_trial_onsets
    df["end_time_bnc-time"] = motor_trial_offsets
    df["approach-end_bnc-time"] = approach_end
    df["depart-start_bnc-time"] = depart_start

    return df

motor_log_df = _load_motor_log_csv(data_folder, digital_signals["motor"])
motor_log_df


Unnamed: 0,motor-order,radius_cm,theta_rad,serial-timestamp,start_time_bnc-time,end_time_bnc-time,approach-end_bnc-time,depart-start_bnc-time
1,2,3.0,3.769911,2024-04-21T16:58:03.6325504+02:00,373.6714,379.9441,373.8714,379.7441
3,1,4.5,2.932153,2024-04-21T16:58:23.6199936+02:00,393.6567,399.8835,393.8566,399.6835
5,2,3.0,4.188790,2024-04-21T16:58:43.6440960+02:00,413.6782,419.9015,413.8781,419.7015
7,1,6.0,2.094395,2024-04-21T16:59:03.6641664+02:00,433.6963,439.9124,433.8963,439.7124
9,1,6.0,2.094395,2024-04-21T16:59:23.6854272+02:00,453.7153,459.9296,453.9153,459.7297
...,...,...,...,...,...,...,...,...
567,2,4.5,4.188790,2024-04-21T18:32:27.8405248+02:00,6037.2332,6043.4512,6037.4332,6043.2512
569,1,6.0,2.932153,2024-04-21T18:32:47.8625536+02:00,6057.2529,6063.4626,6057.4528,6063.2626
571,2,3.0,4.188790,2024-04-21T18:33:07.8735744+02:00,6077.2615,6083.4784,6077.4614,6083.2784
573,1,3.0,2.094395,2024-04-21T18:33:27.8854784+02:00,6097.2707,6103.4806,6097.4707,6103.2806


## Align timestamps to ephys log

In [394]:
def _array_from_oephys_events(npx_traces_events, fs, pad_s=5, offset_s=0):
    """Create a boolean array from a list of events.

    Parameters
    ----------
    events : list
        List of tuples with event onset and duration.
    fs : int
        Sampling frequency.
    length : int
        Length of the array.

    Returns
    -------
    np.ndarray
        Boolean array with events.
    """
    # create
    barcodes_times = np.array([(evt[0]-offset_s, evt[1]) for evt in list(npx_traces_events.get_events('Neuropixels PXI Sync'))])
    n_pts = int((barcodes_times[-1, 0] + pad_s)*fs)
    barcodes_array = np.zeros(n_pts, dtype=bool)

    for onset, duration in barcodes_times:
        onset_idx = int(onset*barcodes_fs)
        offset_idx = int((onset + duration)*barcodes_fs)
        barcodes_array[onset_idx:offset_idx] = True

    return barcodes_array


# Read traces and synch events:
npx1_traces_events = si.read_openephys_event(oephys_folder)
barcodes_fs = digital_traces_interface.get_sampling_frequency()

# Offset to be subtracted (first value of the probeA timestamps):
recording1_interface = si.read_openephys(oephys_folder, stream_name="Record Node 103#Neuropix-PXI-100.ProbeA-AP")
offset_start = recording1_interface.get_time_info()["t_start"]
barcodes_array = _array_from_oephys_events(npx1_traces_events, barcodes_fs, offset_s=offset_start)

barcode_signal_npx = BarcodeSignal(barcodes_array, fs=barcodes_fs)
barcode_signal_nidaqx = BarcodeSignal(digital_signals["barcode"].array, fs=digital_signals["barcode"].fs)
barcode_signal_npx.onsets_times[:5]
barcode_signal_nidaqx.map_times_to(barcode_signal_npx, 0)
offset_start


373.2864

In [402]:
def _register_df_times(df, barcode_signal_npx, barcode_signal_nidaqx):
    """Convert the times in the dataframe to the barcode signal times."""
    df = df.copy()
    column_to_fix = [c for c in df.columns if "bnc-time" in c]
    new_timename = ""

    for c in column_to_fix:
        new_colname = c.replace("_bnc-time", new_timename)
        df[new_colname] = barcode_signal_nidaqx.map_times_to(barcode_signal_npx, df[c].values)
        df.drop(c, axis=1, inplace=True)

    return df

def _check_datetime_consistency_and_drop(df, oephys_datetime):
    """Assert that the datetime columns in the dataframe are consistent with the oephys datetime."""
    df = df.copy()
    # Convert "serial-timestamp" to datetime:
    df["serial-timestamp"] = pd.to_datetime(df["serial-timestamp"])
    
    # check timestamp of rows with the sum between ephys datetime and the npx-time column:
    npx_onsets_datetimes = df["start_time"].apply(lambda x: oephys_datetime + timedelta(seconds=x))
    assert all(abs((npx_onsets_datetimes - df["serial-timestamp"]).dt.total_seconds().values < 1)), "Timestamps do not match"

    return df.drop("serial-timestamp", axis=1)


motor_log_df = _load_motor_log_csv(data_folder, digital_signals["motor"])
motor_log_df = _register_df_times(motor_log_df, barcode_signal_npx, barcode_signal_nidaqx)
motor_log_df = _check_datetime_consistency_and_drop(motor_log_df, oephys_datetime)
motor_log_df

Unnamed: 0,motor-order,radius_cm,theta_rad,start_time,end_time,approach-end,depart-start
1,2,3.0,3.769911,373.644122,379.916842,373.844122,379.716841
3,1,4.5,2.932153,393.629486,399.856306,393.829387,399.656305
5,2,3.0,4.188790,413.651050,419.874370,413.850951,419.674369
7,1,6.0,2.094395,433.669214,439.885334,433.869215,439.685334
9,1,6.0,2.094395,453.688279,459.902599,453.888279,459.702698
...,...,...,...,...,...,...,...
567,2,4.5,4.188790,6037.224098,6043.442118,6037.424099,6043.242118
569,1,6.0,2.932153,6057.243863,6063.453583,6057.443763,6063.253582
571,2,3.0,4.188790,6077.252527,6083.469447,6077.452427,6083.269446
573,1,3.0,2.094395,6097.261791,6103.471711,6097.461792,6103.271710


In [430]:
def _load_frames_csv(camera_name):
    frames_trigger_df = pd.read_csv(_find_in_folder(data_folder, f"{camera_name}*timestamps*.csv"))
    frames_trigger_df.rename(columns={"Timestamp": "serial-timestamp"}, inplace=True)

    digital_triggers = digital_signals["frames"].onsets_times

    # check if frames are one every one or one every two triggers:
    n_frames = len(frames_trigger_df)
    n_triggers = len(digital_triggers)

    if n_frames == n_triggers:
        print(f"Frames and triggers match for {camera_name}")
    elif n_frames == n_triggers  // 2:
        print(f"Frames and triggers match for {camera_name} with half the triggers")
    else:
        print(f"Frames and triggers do not match exactly for {camera_name} ({n_frames} frames, {n_triggers} triggers); estimating...")
        skip_frames = abs(n_frames - n_triggers // 2) < abs(n_frames - n_triggers)
        
        if skip_frames:
            print(f"Skipping every second trigger for {camera_name}")
            digital_triggers = digital_triggers[::2]
        
        n_triggers = len(digital_triggers)
        if n_frames < n_triggers:
            print(f"More triggers than frames, truncating triggers to match frames")
            digital_triggers = digital_triggers[:n_frames]
        else:
            print(f"More frames than triggers, truncating frames to match triggers")
            frames_trigger_df = frames_trigger_df.iloc[:n_triggers]
       
        digital_triggers = digital_triggers[:n_frames]

    # frames_trigger_df["trigger_bnc-time"] = digital_triggers

    return frames_trigger_df

for camera_name in ["TopCamera"]:#, "EyeCamera"]:
    df = _load_frames_csv(camera_name)



Frames and triggers do not match exactly for TopCamera (537292 frames, 536848 triggers); estimating...
Excluding final triggers


In [432]:
# convert serial-timestamp to datetime:
df["serial-timestamp"] = pd.to_datetime(df["serial-timestamp"])

In [441]:
tdiff = df["serial-timestamp"].diff().dt.total_seconds()
tdiff_triggers = np.diff(digital_signals["frames"].onsets_times)

## Validate timings looking at laser:

In [494]:
try:
    laser_mean = np.load(data_folder / "laser_mean_cache.npy")
except FileNotFoundError:
    from bonpy import OpenCVMovieData
    mov_data = OpenCVMovieData("/Users/vigji/Desktop/test_mpa_dir/P02_MPAOPTO_LP/e02_ephys-contrapag-stim/M21/20240421/165242/TopCamera__video_2024-04-21T16_52_14.avi")

    laset_mov = mov_data[:, 273:296, 423:450]
    laser_mean = np.mean(laset_mov, axis=(1, 2))
    np.save(data_folder / "laser_mean_cache.npy", laser_mean)

laser_mean_cut = laser_mean[:len(digital_signals["frames"].onsets_times)]
laser_signal = digital_signals["laser"]
x_array = np.arange(len(laser_signal.array)) / laser_signal.fs

In [499]:
debug_plots=False
if debug_plots:
    f = plt.figure(figsize=(12, 5))
    plt.plot(x_array, laser_signal.array+1, label="laser command")
    plt.plot(digital_signals["frames"].offsets_times, laser_mean_cut[:-1]/100, c="r", label="laser fiber pixel intensity")
    plt.legend()
    plt.xlabel("Time [s]")
    # f.savefig(fig_folder / "laser_command.png")
    # plt.xlim(373.5, 374)
    f.savefig(fig_folder / "laser_command_begin.png")
    plt.xlim(6097.2, 6097.7)
    f.savefig(fig_folder / "laser_command_end.png")