# Extract and align data from Onix, Harp, Sleap, and photometry

### 2025 Jan 31 - works for importing Cohort 1

In [None]:
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
import pandas as pd
import harp

from harp_resources import process, utils
from sleap import load_and_process as lp

# Extracting Noras data

In [None]:
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2717-2024-12-12T13-00-21')
data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2718-2024-12-12T13-28-14')


photometry_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2717-2024-12-12T13-00-21_processedData/photometry')
h1_datafolder = data_path / 'HarpDataH1'
h2_datafolder = data_path / 'HarpDataH2'

In [None]:
# from andrew for Changed digital output file\

#h1 and h2 only needed if timestamps are readed separately and not as all harp_streams
h1_reader = harp.create_reader('harp_resources/h1-device.yml', epoch=harp.REFERENCE_EPOCH)
h2_reader = harp.create_reader('harp_resources/h2-device.yml', epoch=harp.REFERENCE_EPOCH)
session_settings_reader = utils.SessionData("SessionSettings")
experiment_events_reader = utils.TimestampedCsvReader("ExperimentEvents", columns=["Event"])
onix_framecount_reader = utils.TimestampedCsvReader("OnixAnalogFrameCount", columns=["Index"])
photometry_reader = utils.PhotometryReader("Processed_fluorescence")
video_reader1 = utils.Video("VideoData1")
video_reader2 = utils.Video("VideoData2")
onix_digital_reader = utils.TimestampedCsvReader("OnixDigital", columns=["Clock", "HubClock", 
                                                                         "DigitalInputs0",
                                                                         "DigitalInputs1",
                                                                         "DigitalInputs2",
                                                                         "DigitalInputs3",
                                                                         "DigitalInputs4",
                                                                         "DigitalInputs5"
                                                                         "DigitalInputs6",
                                                                         "DigitalInputs7",
                                                                         "DigitalInputs8",
                                                                         "Buttons"])
onix_harp_reader = utils.TimestampedCsvReader("OnixHarp", columns=["Clock", "HubClock", "HarpTime"])

In [None]:
#read metadata
print ("Loading session settings")
session_settings = utils.load_2(session_settings_reader, data_path)
# read harp streams, experiment events, video, processed photometry 
print ("Loading experiment events")
experiment_events = utils.load_2(experiment_events_reader, data_path)
print ("Loading processed fluorescence")
photometry_data=pd.read_csv(str(photometry_path)+'/Processed_fluorescence.csv')
print ("Loading processed fluorescence info")
photometry_info=pd.read_csv(str(photometry_path)+'/Info.csv')
print ("Loading processed fluorescence events")
photometry_events=pd.read_csv(str(photometry_path)+'/Events.csv')
print ("Loading video data 1")
video_data = utils.load_2(video_reader1, data_path)
print ("Loading video data 2")
video_data = utils.load_2(video_reader2, data_path)
print ("Loading camera triggers")
camera_triggers = utils.load_harp(h1_reader.Cam0Event, h1_datafolder) #assumes Cam0 triggers both cameras
print ("Loading flow sensor data")
flow_sensor = utils.load_harp(h1_reader.OpticalTrackingRead, h1_datafolder)
print ("Loading OnixDigital")
onix_digital = utils.load_2(onix_digital_reader, data_path)
print ("Loading OnixHarp")
onix_harp = utils.load_2(onix_harp_reader, data_path)
print ("Loading OnixAnalogFrameClock")
framecount = utils.load_2(onix_framecount_reader, data_path)
print ("Loading OnixAnalogClock")
onix_analog_clock = utils.read_OnixAnalogClock(data_path)
print ("Loading H1 and H2 streams")
harp_streams = utils.load_registers(data_path)

In [None]:
%whos

In [None]:
onix_digital

# Bulk extraction
## old data

In [None]:
#data_path = Path('/home/ikharitonov/RANCZLAB-NAS/data/ONIX/20240730_Mismatch_Experiment/MMclosed&Regular_120824/2024-08-12T16-51-16_B3M3')
#photometry_path = Path('/home/ikharitonov/RANCZLAB-NAS/data/ONIX/20240730_Mismatch_Experiment/MMclosed&Regular_120824/photometry/B3M3_MMclosed&Regular_day1/2024_08_12-18_57_17')


In [None]:
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
import pandas as pd
import harp

from harp_resources import process, utils
from sleap import load_and_process as lp

In [None]:
import os

rootdir = '/Users/nora/Desktop/Cohort0_GCaMP_example/'
#rootdir = '/Volumes/RanczLab/20240730_Mismatch_Experiment/GRAB_MMclosed-and-Regular_220824'

#initialize sets to ensure uniqueness
data_paths_set = set()
photometry_paths_set = set()

data_paths = []
photometry_paths = []

for dirpath, subdirs, files in os.walk(rootdir):
    #data paths
    if 'ExperimentEvents' in dirpath:
        trimmed_path = dirpath[:-17]
        if trimmed_path not in data_paths_set:
            data_paths_set.add(trimmed_path)
            data_paths.append(trimmed_path)

    #photometry paths
    for x in files:
        if 'Processed_fluorescence.csv' in x:
            if dirpath not in photometry_paths_set:
                photometry_paths_set.add(dirpath)
                photometry_paths.append(dirpath)




In [None]:
photometry_paths

In [None]:
data_paths

In [None]:
def run_extraction_makeh5(data_path, photometry_path, make_h5 = False, eyes = False):
    print(f'\n Running extraction for {data_path.split('/')[-1][20:]} for session {data_path.split('/')[-2]}')
    #data_path = Path(data_path)
    #photometry_path = Path(photometry_path)
    SessionSettings = utils.read_SessionSettings(Path(data_path), print_contents=True)
    conversions = process.calculate_conversions_second_approach(Path(data_path), Path(photometry_path), verbose=False)
    streams = utils.load_registers(Path(data_path))

    Photometry = utils.read_fluorescence(Path(photometry_path))
    Photometry['HARP Timestamps'] = conversions['photometry_to_harp_time'](Photometry.index)
    
    OnixAnalogClock = utils.read_OnixAnalogClock(Path(data_path))
    OnixAnalogData = utils.read_OnixAnalogData(Path(data_path), binarise=True)
    ExperimentEvents = utils.read_ExperimentEvents(Path(data_path)) 
    
    photodiode_series = pd.Series(OnixAnalogData[:,0], index=conversions['onix_to_harp_timestamp'](OnixAnalogClock))

    print('Adding Photometry, Eye Movements and Photodiode to the streams')
    streams = process.reformat_and_add_many_streams(streams, Photometry, 'Photometry', ['470_dfF', 'z_470'], index_column_name='HARP Timestamps')
    streams = process.add_stream(streams, 'ONIX', photodiode_series, 'Photodiode')
    
    
    _ = process.get_timepoint_info(streams, print_all=True)
    resampled_streams = process.pad_and_resample(streams, resampling_period='1 ms', method='linear')
    _ = process.get_timepoint_info(resampled_streams, print_all=True)

    print('Applying linear and angular conversion to Optical tracking sensor streams (cm / sec and degrees / sec)')
    resampled_streams['H1']['OpticalTrackingRead0X(46)'] = process.running_unit_conversion(resampled_streams['H1']['OpticalTrackingRead0X(46)']*100)
    resampled_streams['H1']['OpticalTrackingRead0Y(46)'] = process.rotation_unit_conversion(resampled_streams['H1']['OpticalTrackingRead0Y(46)'])
        
    print('Streams are extracted and can be used or made to h5')
    if make_h5:
        process.save_streams_as_h5(Path(data_path), resampled_streams, streams_to_save_pattern, SessionSetting)

In [None]:
def reformat_and_add_many_streams(streams, dataframe, source_name, stream_names, index_column_name='Seconds'):
    for stream_name in stream_names:
        print(dataframe)
        new_stream = process.reformat_dataframe(dataframe, stream_name, index_column_name, data_column_name=stream_name)
        streams = process.add_stream(streams, source_name, new_stream, stream_name)
    return streams

In [None]:
run_extraction_makeh5(data_paths[0], photometry_paths[0]) 


In [None]:
def run_extraction_makeh5(data_path, photometry_path, photometry_traces = ['470_dfF', 'z_470'],make_h5=False, eyes=False):
    print(f'\n Running extraction for {data_path.split("/")[-1][20:]} for session {data_path.split("/")[-2]}')
    
    SessionSettings = utils.read_SessionSettings(Path(data_path), print_contents=True)
    conversions = process.calculate_conversions_second_approach(Path(data_path), Path(photometry_path), verbose=False)
    streams = utils.load_registers(Path(data_path))

    Photometry = utils.read_fluorescence(Path(photometry_path))
    Photometry['HARP Timestamps'] = conversions['photometry_to_harp_time'](Photometry.index)
    
    OnixAnalogClock = utils.read_OnixAnalogClock(Path(data_path))
    OnixAnalogData = utils.read_OnixAnalogData(Path(data_path), binarise=True)
    ExperimentEvents = utils.read_ExperimentEvents(Path(data_path)) 
    
    photodiode_series = pd.Series(OnixAnalogData[:, 0], index=conversions['onix_to_harp_timestamp'](OnixAnalogClock))

    print('Adding Photometry, Eye Movements and Photodiode to the streams')
    streams = process.reformat_and_add_many_streams(streams, Photometry, 'Photometry', photometry_traces,
                                                    index_column_name='HARP Timestamps')
    streams = process.add_stream(streams, 'ONIX', photodiode_series, 'Photodiode')

    # Handle Eye Movement Data if specified
    if eyes:
        print('  Checking for and processing videography data...')
        try:
            # Load videography data
            VideoData1, VideoData2, VideoData1_Has_Sleap, VideoData2_Has_Sleap = lp.load_videography_data(data_path)

            if VideoData2_Has_Sleap:
                print('  Processing VideoData2 with SLEAP data...')
                
                # Interpolate missing data
                VideoData2 = VideoData2.interpolate()

                # Extract coordinates and compute transformations
                columns_of_interest = [
                    'left.x', 'left.y', 'center.x', 'center.y', 'right.x', 'right.y',
                    'p1.x', 'p1.y', 'p2.x', 'p2.y', 'p3.x', 'p3.y', 'p4.x', 'p4.y',
                    'p5.x', 'p5.y', 'p6.x', 'p6.y', 'p7.x', 'p7.y', 'p8.x', 'p8.y'
                ]
                coordinates_dict = lp.get_coordinates_dict(VideoData2, columns_of_interest)

                # Calculate transformations
                theta = lp.find_horizontal_axis_angle(VideoData2, 'left', 'center')
                center_point = lp.get_left_right_center_point(coordinates_dict)

                reformatted_coordinates_dict = lp.get_reformatted_coordinates_dict(coordinates_dict, ['left', 'right', 'center'] + [f'p{i}' for i in range(1, 9)])
                centered_coordinates_dict = lp.get_centered_coordinates_dict(reformatted_coordinates_dict, center_point)
                rotated_coordinates_dict = lp.get_rotated_coordinates_dict(centered_coordinates_dict, theta)

                # Extract ellipse parameters
                columns_of_interest = [f'p{i}' for i in range(1, 9)]
                ellipse_parameters_data, ellipse_center_points_data = lp.get_fitted_ellipse_parameters(
                    rotated_coordinates_dict, columns_of_interest
                )

                # Compute additional metrics
                average_diameter = np.mean([ellipse_parameters_data[:, 0], ellipse_parameters_data[:, 1]], axis=0)

                # Prepare SLEAP video data for streams
                SleapVideoData2 = process.convert_arrays_to_dataframe(
                    ['Seconds', 'Ellipse.Diameter', 'Ellipse.Angle', 'Ellipse.Center.X', 'Ellipse.Center.Y'],
                    [VideoData2['Seconds'].values, average_diameter, ellipse_parameters_data[:, 2],
                     ellipse_center_points_data[:, 0], ellipse_center_points_data[:, 1]]
                )

                streams = process.reformat_and_add_many_streams(
                    streams, SleapVideoData2, 'SleapVideoData2',
                    ['Ellipse.Diameter', 'Ellipse.Angle', 'Ellipse.Center.X', 'Ellipse.Center.Y']
                )

        except Exception as e:
            print(f"Error processing videography data: {e}")

    # Display timepoint info for streams
    _ = process.get_timepoint_info(streams, print_all=True)

    # Resample streams
    resampled_streams = process.pad_and_resample(streams, resampling_period='1 ms', method='linear')
    _ = process.get_timepoint_info(resampled_streams, print_all=True)

    # Apply unit conversions for optical tracking sensor streams
    print('  Applying linear and angular conversion to Optical tracking sensor streams (cm/sec and degrees/sec)')
    resampled_streams['H1']['OpticalTrackingRead0X(46)'] = process.running_unit_conversion(
        resampled_streams['H1']['OpticalTrackingRead0X(46)'] * 100
    )
    resampled_streams['H1']['OpticalTrackingRead0Y(46)'] = process.rotation_unit_conversion(
        resampled_streams['H1']['OpticalTrackingRead0Y(46)']
    )

    print(' - Streams are extracted and can be used or made to h5')

    if make_h5:
        # Define streams to save, including SLEAP data if processed
        streams_to_save_pattern = {
            'Photometry':photometry_traces,
            'ONIX': ['Photodiode'],
        }
        if eyes:
            streams_to_save_pattern['SleapVideoData2'] = ['Ellipse.Diameter', 'Ellipse.Center.X', 'Ellipse.Center.Y']

        process.save_streams_as_h5(Path(data_path), resampled_streams, streams_to_save_pattern)
        print('Streams saved as h5 file \n')

    return data_path, resampled_streams


In [None]:
data_path, resampled_streams= run_extraction_makeh5(data_paths[0], photometry_paths[0], make_h5=True, eyes=True)

## Loading and Synchronisation 

In [None]:
photometry_path = Path(photometry_paths[3])
data_path = Path(data_paths[3])
photometry_paths[3]

In [None]:
photometry_path = Path('/Volumes/RanczLab/Photometry_recordings/August_Mismatch_Experiment_G8m/B3M3_MMclosed_and_Regular_day2')
data_path = Path('/Volumes/RanczLab/20240730_Mismatch_Experiment/G8_MMclosed-and-regular_130824/2024-08-13T12-53-01_B3M3')

In [None]:
photometry_path = Path('/Volumes/RanczLab/Photometry_recordings/August_Mismatch_Experiment_G8m/MM_closed-and-open_day2/B2M4/photometry')
data_path = Path('/Volumes/RanczLab/20240730_Mismatch_Experiment/G8_MMclosed-and-open_080824/2024-08-08T09-20-54_B2M4')


### TEST

In [None]:
data_paths[0]

In [None]:
photometry_paths[0]

In [None]:
#OnixDigital = utils.read_OnixDigital(Path(data_path))
#PhotometryEvents = utils.read_fluorescence_events(Path(photometry_path))

In [None]:
OnixDigital = utils.read_OnixDigital(Path(data_paths[2]))
PhotometryEvents = utils.read_fluorescence_events(Path(photometry_paths[2]))

In [None]:
OnixDigital

In [None]:
PhotometryEvents

In [None]:
onix_digital_array = OnixDigital["Seconds"].values
photometry_events_array = PhotometryEvents['TimeStamp'].values

In [None]:
# Example time series (replace with your actual data)
from scipy.signal import correlate


time_series_1 = np.diff(onix_digital_array) # First time series
time_series_2 = np.diff(photometry_events_array)   # Second time series

# Cross-correlation
correlation = correlate(time_series_1, time_series_2, mode='full')
offset = np.argmax(correlation) - (len(time_series_2) - 1)

# Results
print("Offset at maximum correlation:", offset)

# Optional: Visualize the correlation

lags = np.arange(-len(time_series_2) + 1, len(time_series_1))
plt.figure(figsize=(10, 6))
plt.plot(lags, correlation)
plt.title("Cross-Correlation")
plt.xlabel("Lag")
plt.ylabel("Correlation")
plt.axvline(x=offset, color='red', linestyle='--', label=f'Max Offset: {offset}')
plt.legend()
plt.show()

In [None]:
onix_digital_array

In [None]:
len(np.unique(np.diff(photometry_events_array)))

In [None]:
plt.plot(np.diff(photometry_events_array)[8:])
ax2.plot()
#plt.xlim([0,len(np.diff(photometry_events_array))])
#plt.xlim([168,200+168])
#plt.xlim([0,200])

In [None]:
plt.plot(np.diff(onix_digital_array))
#plt.xlim([0, len(np.diff(onix_digital_array))-168])
#plt.xlim([0,200])
#plt.xlim([26,226])

In [None]:
fig, ax1 = plt.subplots(figsize=(15,10))
ax2 = ax1.twinx()
if offset > 0:
    ax2.plot(np.diff(onix_digital_array[:offset]))
    ax1.plot(np.diff(photometry_events_array), color = 'r')
if offset < 0:
    ax2.plot(np.diff(onix_digital_array))
    ax1.plot(np.diff(photometry_events_array[abs(offset):]), color = 'r')
if offset == 0:
    ax2.plot(np.diff(onix_digital_array))
    ax1.plot(np.diff(photometry_events_array), color = 'r')
#plt.xlim([0,200])

In [None]:
conversions = process.calculate_conversions_second_approach(data_paths[0], photometry_paths[0], verbose=False)
# After hardware ONIX clock implementation - this will have to be adapted
# Only photometry will need to be converted

In [None]:
streams = utils.load_registers(data_path)

Photometry = utils.read_fluorescence(photometry_path)
Photometry['HARP Timestamps'] = conversions['photometry_to_harp_time'](Photometry['TimeStamp'])

OnixAnalogClock = utils.read_OnixAnalogClock(data_path)
OnixAnalogData = utils.read_OnixAnalogData(data_path, binarise=True)
ExperimentEvents = utils.read_ExperimentEvents(data_path) 



photodiode_series = pd.Series(OnixAnalogData[:,0], index=conversions['onix_to_harp_timestamp'](OnixAnalogClock))

In [None]:
# Adding Photometry, Eye Movements and Photodiode to the streams
streams = process.reformat_and_add_many_streams(streams, Photometry, 'Photometry', ['CH1-410', 'CH1-470', 'CH1-560'], index_column_name='HARP Timestamps')
streams = process.add_stream(streams, 'ONIX', photodiode_series, 'Photodiode')

In [None]:
_ = process.get_timepoint_info(streams, print_all=True)
resampled_streams = process.pad_and_resample(streams, resampling_period='1 ms', method='linear')
_ = process.get_timepoint_info(resampled_streams, print_all=True)

In [None]:
# Applying linear and angular conversion to Optical tracking sensor streams
# OpticalTrackingRead0X(46) converted to centimeters per second
# OpticalTrackingRead0Y(46) covnerted to degrees per second
resampled_streams['H1']['OpticalTrackingRead0X(46)'] = process.running_unit_conversion(resampled_streams['H1']['OpticalTrackingRead0X(46)']*100)
resampled_streams['H1']['OpticalTrackingRead0Y(46)'] = process.rotation_unit_conversion(resampled_streams['H1']['OpticalTrackingRead0Y(46)'])

In [None]:
print(type(resampled_streams['Photometry']['CH1-470']))
resampled_streams['Photometry']['CH1-470']

In [None]:
streams_to_save_pattern = {'H1': ['OpticalTrackingRead0X(46)', 'OpticalTrackingRead0Y(46)'], 'H2': ['Encoder(38)'], 'Photometry': ['CH1-410', 'CH1-470', 'CH1-560'], 'ONIX': ['Photodiode']}
streams_to_save_pattern
