# Extract and align data from Onix, Harp, Sleap, and photometry

### Setup

In [1]:
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
import pandas as pd
import harp
from pathlib import Path

from harp_resources import process, utils
from sleap import load_and_process as lp

# Define paths
Define a root directory through wich you can loop to get both photometry data and onix data. 
May need to be done in a different way if file structures are changed.

In [2]:
rootdir = '/Users/nora/Desktop/Cohort0_GCaMP_example'

#initialize sets to ensure uniqueness
data_paths_set = set()
photometry_paths_set = set()

data_paths = []
photometry_paths = []

for dirpath, subdirs, files in os.walk(rootdir):
    #data paths
    if 'ExperimentEvents' in dirpath:
        trimmed_path = dirpath[:-17]
        if trimmed_path not in data_paths_set:
            data_paths_set.add(trimmed_path)
            data_paths.append(trimmed_path)

    #photometry paths
    for x in files:
        if 'Processed_fluorescence.csv' in x:
            if dirpath not in photometry_paths_set:
                photometry_paths_set.add(dirpath)
                photometry_paths.append(dirpath)


### Have a look to ensure that the photometry and onix data paths match

In [3]:
data_paths

['/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3']

In [4]:
photometry_paths 

['/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3/photometry_processed']

# Call funcitons to extract data and align timestamps
All the function calls needed can be arranged in a function or called one by one.

In [7]:
def run_extraction_makeh5(data_path, photometry_path, make_h5=True, eyes=False):
    print(f'\n Running extraction for {data_path.split("/")[-1][20:]} for session {data_path.split("/")[-2]}')

    # Load session settings and conversions
    SessionSettings = utils.read_SessionSettings(Path(data_path), print_contents=True)
    conversions = process.calculate_conversions_second_approach(Path(data_path), Path(photometry_path), verbose=False)
    streams = utils.load_registers(Path(data_path))

    # Load photometry data and align timestamps
    Photometry = utils.read_fluorescence(Path(photometry_path))
    Photometry['HARP Timestamps'] = conversions['photometry_to_harp_time'](Photometry['TimeStamp'])

    # Load OnixAnalog data
    OnixAnalogClock = utils.read_OnixAnalogClock(Path(data_path))
    OnixAnalogData = utils.read_OnixAnalogData(Path(data_path), binarise=True)
    ExperimentEvents = utils.read_ExperimentEvents(Path(data_path))
    photodiode_series = pd.Series(OnixAnalogData[:, 0], index=conversions['onix_to_harp_timestamp'](OnixAnalogClock))

    print('Adding Photometry, Eye Movements, and Photodiode to the streams')
    # Add Photometry and Photodiode streams
    streams = process.reformat_and_add_many_streams(
        streams, Photometry, 'Photometry', ['470_dfF', 'z_470'], index_column_name='HARP Timestamps'
    )
    streams = process.add_stream(streams, 'ONIX', photodiode_series, 'Photodiode')

    # Process videography data if eyes=True
    if eyes:
        print('  Checking for and processing videography data...')
        try:
            # Load videography data
            VideoData1, VideoData2, VideoData1_Has_Sleap, VideoData2_Has_Sleap = lp.load_videography_data(data_path)

            if VideoData2_Has_Sleap:
                print('  Processing VideoData2 with SLEAP data...')

                # Interpolate missing data
                VideoData2 = VideoData2.interpolate()

                # Extract coordinates and compute transformations
                columns_of_interest = [
                    'left.x', 'left.y', 'center.x', 'center.y', 'right.x', 'right.y',
                    'p1.x', 'p1.y', 'p2.x', 'p2.y', 'p3.x', 'p3.y', 'p4.x', 'p4.y',
                    'p5.x', 'p5.y', 'p6.x', 'p6.y', 'p7.x', 'p7.y', 'p8.x', 'p8.y'
                ]
                coordinates_dict = lp.get_coordinates_dict(VideoData2, columns_of_interest)

                # Calculate transformations
                theta = lp.find_horizontal_axis_angle(VideoData2, 'left', 'center')
                center_point = lp.get_left_right_center_point(coordinates_dict)

                reformatted_coordinates_dict = lp.get_reformatted_coordinates_dict(coordinates_dict, ['left', 'right', 'center'] + [f'p{i}' for i in range(1, 9)])
                centered_coordinates_dict = lp.get_centered_coordinates_dict(reformatted_coordinates_dict, center_point)
                rotated_coordinates_dict = lp.get_rotated_coordinates_dict(centered_coordinates_dict, theta)

                # Extract ellipse parameters
                columns_of_interest = [f'p{i}' for i in range(1, 9)]
                ellipse_parameters_data, ellipse_center_points_data = lp.get_fitted_ellipse_parameters(
                    rotated_coordinates_dict, columns_of_interest
                )

                # Compute additional metrics
                average_diameter = np.mean([ellipse_parameters_data[:, 0], ellipse_parameters_data[:, 1]], axis=0)

                # Prepare SLEAP video data for streams
                SleapVideoData2 = process.convert_arrays_to_dataframe(
                    ['Seconds', 'Ellipse.Diameter', 'Ellipse.Angle', 'Ellipse.Center.X', 'Ellipse.Center.Y'],
                    [VideoData2['Seconds'].values, average_diameter, ellipse_parameters_data[:, 2],
                     ellipse_center_points_data[:, 0], ellipse_center_points_data[:, 1]]
                )

                streams = process.reformat_and_add_many_streams(
                    streams, SleapVideoData2, 'SleapVideoData2',
                    ['Ellipse.Diameter', 'Ellipse.Angle', 'Ellipse.Center.X', 'Ellipse.Center.Y']
                )

        except Exception as e:
            print(f"Error processing videography data: {e}")

    # Display timepoint info for streams
    _ = process.get_timepoint_info(streams, print_all=True)

    # Resample streams
    resampled_streams = process.pad_and_resample(streams, resampling_period='1 ms', method='linear')
    _ = process.get_timepoint_info(resampled_streams, print_all=True)

    # Apply unit conversions for optical tracking sensor streams
    print('  Applying linear and angular conversion to Optical tracking sensor streams (cm/sec and degrees/sec)')
    resampled_streams['H1']['OpticalTrackingRead0X(46)'] = process.running_unit_conversion(
        resampled_streams['H1']['OpticalTrackingRead0X(46)'] * 100
    )
    resampled_streams['H1']['OpticalTrackingRead0Y(46)'] = process.rotation_unit_conversion(
        resampled_streams['H1']['OpticalTrackingRead0Y(46)']
    )

    print(' - Streams are extracted and can be used or made to h5')

    if make_h5:
        # Define streams to save, including SLEAP data if processed
        streams_to_save_pattern = {
            'Photometry': ['470_dfF', 'z_470'],
            'ONIX': ['Photodiode'],
        }
        if eyes:
            streams_to_save_pattern['SleapVideoData2'] = ['Ellipse.Diameter', 'Ellipse.Center.X', 'Ellipse.Center.Y']

        process.save_streams_as_h5(Path(data_path), resampled_streams, streams_to_save_pattern, SessionSettings)
        print('Streams saved as h5 file \n')

    return data_path, resampled_streams, streams_to_save_pattern



## Call the function
Below, the funciton is called with only one path combo, and then with several through a loop
If all seems to work, you can set make_h5 to True, or you can use the resampled streams and save pattern daved to the dict made in the loop to make the h5 files.

In [8]:
data_path, resampled_streams, streams_to_save_pattern = run_extraction_makeh5(
    data_paths[0], photometry_paths[0], make_h5=False, eyes=True)


 Running extraction for B3M3 for session Cohort0_GCaMP_example
{
    "seconds": 768959.520256,
    "value": {
        "motorSettings": {
            "nominalPulseInterval": 200,
            "initialPulseInterval": 200,
            "pulseStepInterval": 2,
            "pulsePeriod": 10
        },
        "blocks": [
            {
                "alias": "LinearMismatch",
                "playbackFilePath": "",
                "blockGainModifier": 1.0,
                "flowXToVisualGain": 0.0008,
                "flowYToVisualGain": 0.0,
                "rotaryToVisualGain": 0.0,
                "playbackToVisualGain": 0.0,
                "stimulus": {
                    "type": 0,
                    "spatialFrequency": 28.8,
                    "offset1": -90.0,
                    "extent1": 180.0,
                    "offset2": 90.0,
                    "extent2": 180.0,
                    "path": ""
                },
                "flowXToMotorGain": 0.0,
                "flo

  sorted_filenames = pd.to_datetime(pd.Series([x.split('_')[1].split('.')[0] for x in filenames])).sort_values()


Registers loaded in 1.59 seconds.


KeyError: 'TimeStamp'

In [9]:
Photometry

NameError: name 'Photometry' is not defined

In [None]:
stream_dict = {}
for i, (datapath, photometry_path) in enumerate(zip(data_paths, photometry_paths)):
    print(datapath)
    print(photometry_path)
    stream_dict[f'dataset_{i}'] = {}
    data_path, resampled_streams, streams_to_save_pattern = run_extraction_makeh5(
    data_paths[0], photometry_paths[0], make_h5=False, eyes=True)
    stream_dict[f'dataset_{i}']['resampled_streams'] = resampled_streams
    stream_dict[f'dataset_{i}']['streams_to_save_pattern'] = streams_to_save_pattern
    stream_dict[f'dataset_{i}']['data_path'] = data_path
    

In [None]:
h1_reader

In [None]:
#!!! ONLY RUN IF YOU WANT TO MAKE NEW H5 FILES !!!!

for dataset, data_dict in stream_dict.items():

    process.save_streams_as_h5(Path(data_dict['data_path']), data_dict['resampled_streams'], data_dict['streams_to_save_pattern'], SessionSettings)
    

# Extracting Noras data

This is adapted from the instructions given by Andrew for reading the temporary onix digital file versions (the weird ones)

In [11]:
# photometry_path = '/Volumes/RanczLab/Nora_Cohort1_training/Training_day4/B6J2717-2024-11-28T09-37-55/photometry'
# data_path = '/Volumes/RanczLab/Nora_Cohort1_training/Training_day4/B6J2717-2024-11-28T09-37-55/'

photometry_path = '/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3/photometry_processed/'
data_path = '/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3/'

h1_datafolder = data_path+'HarpDataH1'
h2_datafolder = data_path+'HarpDataH2'

In [12]:
pd.read_csv(str(photometry_path)+'/Processed_fluorescence.csv') #Processed_fluorescence.csv #Fluorescence.csv

Unnamed: 0,TimeStamp,filtered_470,filtered_560,filtered_410,470_dfF,560_dfF,410_dfF,z_470,z_560,z_410,Time,mouseID,Area,Sex
0,15.011110,60.750,9.094,13.336148,-3.401339,-0.118508,4.800172,-1.124964,-0.468415,4.059622,2024-08-08 12:08:44.011110,B3M3,v1,m
1,15.044442,64.074,9.135,13.238875,1.884158,0.332083,4.035766,0.947863,1.160582,3.405835,2024-08-08 12:08:44.044442,B3M3,v1,m
2,15.077776,68.062,9.120,13.146754,8.225483,0.167613,3.311848,3.434756,0.565979,2.786679,2024-08-08 12:08:44.077776,B3M3,v1,m
3,15.111110,69.676,9.104,13.067576,10.791906,-0.007842,2.689640,4.441237,-0.068331,2.254514,2024-08-08 12:08:44.111110,B3M3,v1,m
4,15.144442,69.759,9.099,13.008761,10.923885,-0.062481,2.227452,4.492995,-0.265862,1.859211,2024-08-08 12:08:44.144442,B3M3,v1,m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56863,1910.191877,63.243,8.760,12.889807,0.562784,-1.391212,1.292666,0.429656,-4.947367,1.059701,2024-08-08 12:40:19.191877,B3M3,v1,m
56864,1910.225208,63.311,8.817,12.900128,0.670911,-0.749560,1.373770,0.472061,-2.683990,1.129068,2024-08-08 12:40:19.225208,B3M3,v1,m
56865,1910.258534,62.999,8.777,12.915368,0.174799,-1.199809,1.493536,0.277499,-4.272205,1.231503,2024-08-08 12:40:19.258534,B3M3,v1,m
56866,1910.291861,62.518,8.807,12.934425,-0.590040,-0.862088,1.643294,-0.022449,-3.080921,1.359588,2024-08-08 12:40:19.291861,B3M3,v1,m


In [13]:
# from andrew:
#Changed didgital output file
h1_reader = harp.create_reader('harp_resources/h1-device.yml', epoch=harp.REFERENCE_EPOCH)
h2_reader = harp.create_reader('harp_resources/h2-device.yml', epoch=harp.REFERENCE_EPOCH)
session_data_reader = utils.SessionData("SessionSettings")
experiment_events_reader = utils.TimestampedCsvReader("ExperimentEvents", columns=["Event"])
framecount_reader = utils.TimestampedCsvReader("OnixAnalogFrameCount", columns=["Index"])
photometry_reader = utils.PhotometryReader("Processed_fluorescence")
video_reader = utils.Video("VideoData1")
onix_digital_reader = utils.TimestampedCsvReader("OnixDigital", columns=["Clock", "HubClock", 
                                                                         "DigitalInputs0",
                                                                         "DigitalInputs1",
                                                                         "DigitalInputs2",
                                                                         "DigitalInputs3",
                                                                         "DigitalInputs4",
                                                                         "DigitalInputs5"
                                                                         "DigitalInputs6",
                                                                         "DigitalInputs7",
                                                                         "DigitalInputs8",
                                                                         "Buttons"])
onix_harp_reader = utils.TimestampedCsvReader("OnixHarp", columns=["Clock", "HubClock", "HarpTime"])

In [14]:
# read metadata
session_settings = utils.load_2(session_data_reader, data_path)

print(session_settings.iloc[0]['metadata'].blocks[0].haltProtocol)

DotMap(randomDelay=0.5, minumumDelay=0.1, haltTime=1.0, haltGain=1.0)


In [16]:
# read harp streams, experiment events, video
flow_sensor = utils.load_harp(h1_reader.OpticalTrackingRead, h1_datafolder)
camera_triggers = utils.load_harp(h1_reader.Cam0Event, h1_datafolder)
experiment_events = utils.load_2(experiment_events_reader, data_path)
video_data = utils.load_2(video_reader, data_path)

/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3/HarpDataH1/HarpDataH1_46_*.bin
/Users/nora/Desktop/Cohort0_GCaMP_example/2024-08-08T10-05-26_B3M3/HarpDataH1/HarpDataH1_32_*.bin
