# Extract and align data from Onix, Harp, Sleap, and photometry

In [None]:
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import pandas as pd
import harp
import plotly.express as px

from harp_resources import process, utils
from sleap import load_and_process as lp

In [None]:
cohort0 = False #only read harp data when it exists, not in Cohort0 
cohort2 = False

#Cohort 1 single OnixDigital file
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2717-2024-12-12T13-00-21') #single onix_digital files

#Cohort 1 multiple OnixDigital files 
data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2718-2024-12-12T13-28-14') #multiple onix_digital file

#Cohort 1 with clock accumulation issue marked on google sheet 
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2719-2024-12-12T13-59-38') #multiple onix_digital file

#Cohort 0 (no OnixHarp in this Cohort)
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort0/Cohort0_GCaMP_example/B3M3xx-2024-08-08T10-05-26')
#cohort0 = True

#Cohort 2 N.B. no videodata or photometry in this test set 
# cohort2 = True
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort2_like_test_data/2025-01-13T15-47-26')

#Cohort 2 longer test NO OnixHarp! Clock increasing exponentially according to NORA, but does not show uissue N.B. no photometry in this test set (neitjer videos, but yes video_data)
#cohort2 = True
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort2_test_longer/2025-02-07T16-05-04')

#Cohort 2 longer test YES OnixHarp! N.B. no photometry in this test set (neitjer videos, but yes video_data)
#cohort2 = True
#data_path = Path('/Users/rancze/Documents/Data/vestVR/Cohort2_test_longer/2025-02-10T08-18-59')
 

photometry_path = data_path.parent / f"{data_path.name}_processedData" / "photometry"

# h1_datafolder = data_path / 'HarpDataH1' #only if reading separate registers
# h2_datafolder = data_path / 'HarpDataH2' #only if reading separate registers

In [None]:
#h1 and h2 only needed if timestamps are readed separately and not as all harp_streams
# h1_reader = harp.create_reader('harp_resources/h1-device.yml', epoch=harp.REFERENCE_EPOCH)
# h2_reader = harp.create_reader('harp_resources/h2-device.yml', epoch=harp.REFERENCE_EPOCH)

session_settings_reader = utils.SessionData("SessionSettings")
experiment_events_reader = utils.TimestampedCsvReader("ExperimentEvents", columns=["Event"])
onix_framecount_reader = utils.TimestampedCsvReader("OnixAnalogFrameCount", columns=["Index"])
#photometry_reader = utils.PhotometryReader("Processed_fluorescence")
video_reader1 = utils.Video("VideoData1")
video_reader2 = utils.Video("VideoData2")
onix_digital_reader = utils.OnixDigitalReader("OnixDigital", columns=["Value.Clock", "Value.HubClock", 
                                                                         "Value.DigitalInputs",
                                                                         "Seconds"])
onix_harp_reader = utils.TimestampedCsvReader("OnixHarp", columns=["Clock", "HubClock", "HarpTime"])

In [None]:
#read metadata in 2 different ways (to df or to dict, to decide which one is better in the future)
print ("Loading session settings")
session_settings = utils.load_2(session_settings_reader, data_path) #Andrew's, creates ugly df, but used in further analysis code
#session_settings = utils.read_SessionSettings(data_path) #Hilde's, creates prety dict, not aware of multiple files

# read experiment events, video, processed photometry 
print ("Loading experiment events")
experiment_events = utils.load_2(experiment_events_reader, data_path)
if not cohort2:
    print ("Loading processed fluorescence")
    photometry_data=pd.read_csv(str(photometry_path)+'/Processed_fluorescence.csv')
    print ("Loading processed fluorescence info")
    photometry_info=pd.read_csv(str(photometry_path)+'/Info.csv')
    print ("Loading processed fluorescence events")
    photometry_events=pd.read_csv(str(photometry_path)+'/Events.csv')
    print ("Loading video data 1")
    video_data1 = utils.load_2(video_reader1, data_path)
    print ("Loading video data 2")
    video_data2 = utils.load_2(video_reader2, data_path)

# read Onix data 
print ("Loading OnixDigital")
onix_digital = utils.load_2(onix_digital_reader, data_path)
print ("Loading OnixAnalogFrameClock")
onix_analog_framecount = utils.load_2(onix_framecount_reader, data_path)
print ("Loading OnixAnalogClock")
onix_analog_clock = utils.read_OnixAnalogClock(data_path)
print ("Loading OnixAnalogData")
onix_analog_data = utils.read_OnixAnalogData(data_path, channels = [0], binarise=True) #channels is a list of AI lines, 0-11

#read harp streams and separate registers if needed 
print ("Loading H1 and H2 streams as dict or df")
harp_streams = utils.load_registers(data_path, dataframe = True) #loads as df, or if False, as dict 

#read syncronising signal between HARP and ONIX
if not cohort0:
    print ("Loading OnixHarp")
    onix_harp = utils.load_2(onix_harp_reader, data_path)
    # removing possible outliers 
    onix_harp = utils.detect_and_remove_outliers(
    df=onix_harp,
    x_column="HarpTime",
    y_column="Clock",
    verbose=False  # True prints all outliers
    )

# print (" ")
# print ("loading separate registers from H1 and H2 data")
# print ("Loading camera triggers")
# camera_triggers = utils.load_harp(h1_reader.Cam0Event, h1_datafolder) #assumes Cam0 triggers both cameras
# print ("Loading flow sensor data")
# flow_sensor = utils.load_harp(h1_reader.OpticalTrackingRead, h1_datafolder)
print ("Done Loading")

plotting onix_analog_clock vs index 

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Downsample the data
downsample_factor = 100
clock_downsampled = onix_analog_clock[::downsample_factor]

# Create figure using WebGL rendering
fig = go.Figure()
fig.add_trace(go.Scattergl(
    y=clock_downsampled,
    mode="lines",
    name="Onix Analog Clock"
))

# Update layout for performance
fig.update_layout(
    title="Onix Analog Clock Over Time (Optimized)",
    xaxis_title="Index (Downsampled)",
    yaxis_title="Onix Analog Clock",
    dragmode="pan"
)

# Show the optimized figure
fig.show()


### DEV align Onix, HARP and Photometry data 

In [None]:
import importlib
importlib.reload(utils)  # Forces Python to reload the updated module
None

In [None]:
conversions = process.photometry_alingment_Cohort1plus(
    onix_analog_clock, 
    onix_analog_framecount,
    onix_digital,
    photometry_events, 
    verbose=True
)


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def plot_linfit(df, x_column, y_column):
    """
    Plots `Clock` vs `HarpTime` and fits a linear model.
    Prints fit parameters (slope, intercept) and evaluation metrics (RMSE, MAE, R-squared).
    
    Parameters:
        df (pd.DataFrame): The input DataFrame.
        x_column (str): The column name for the independent variable (HarpTime).
        y_column (str): The column name for the dependent variable (Clock).
    """
    # Fit linear model
    X = df[x_column].values.reshape(-1, 1)
    y = df[y_column].values
    model = LinearRegression()
    model.fit(X, y)

    # Predict values
    y_pred = model.predict(X)

    # Calculate fit parameters and metrics
    slope = model.coef_[0]
    intercept = model.intercept_
    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    # Print fit parameters and evaluation metrics
    print(f"Fit Parameters:")
    print(f"  Slope: {slope}")
    print(f"  Intercept: {intercept}")
    print("\nEvaluation Metrics:")
    print(f"  RMSE (in y units): {rmse}")
    print(f"  MAE: {mae}")
    print(f"  R-squared: {r2}")

    # Add fitted values to the DataFrame
    df["Fitted_Y"] = y_pred

    # Plot data with fitted line
    fig = px.scatter(
        df,
        x=x_column,
        y=y_column,
        title=f"{y_column} vs {x_column} with Linear Fit",
        labels={x_column: x_column, y_column: y_column}
    )

    # Add fitted line
    fig.add_scatter(
        x=df[x_column],
        y=df["Fitted_Y"],
        mode="lines",
        name="Fitted Line"
    )
    fig.update_traces(hoverinfo="none")
    fig.show()

In [None]:
plot_linfit(onix_harp, "HarpTime", "Clock")

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

def fit_and_plot(df, x_column, y_column, downsample_factor=10):
    """
    Fits a linear model to the full dataset and plots a downsampled scatter plot with the fitted line.
    
    Parameters:
        df (pd.DataFrame): The input DataFrame containing the data.
        x_column (str): Column name for the independent variable (X).
        y_column (str): Column name for the dependent variable (Y).
        downsample_factor (int): Factor for downsampling the data for plotting (default: 10).
    
    Returns:
        dict: Fit parameters including slope, intercept, and R-squared value.
    """
    # Ensure data is in NumPy array format
    X_full = df[x_column].to_numpy().reshape(-1, 1)  # Full dataset
    y_full = df[y_column].to_numpy().squeeze()  # Ensure 1D array

    # Fit a linear regression model
    model = LinearRegression()
    model.fit(X_full, y_full)

    # Compute fit parameters
    slope = model.coef_[0]
    intercept = model.intercept_
    y_pred_full = model.predict(X_full)
    r2 = r2_score(y_full, y_pred_full)

    # Print fit parameters
    print(f"Fit Parameters (Full Dataset):")
    print(f"  Slope: {slope}")
    print(f"  Intercept: {intercept}")
    print(f"  R-squared: {r2}")

    # Downsample for efficient plotting
    X_downsampled = df[x_column].to_numpy()[::downsample_factor].reshape(-1, 1)
    y_downsampled = df[y_column].to_numpy()[::downsample_factor].squeeze()
    y_pred_downsampled = model.predict(X_downsampled)  # Predict using full-fit model

    # Create Interactive Plot
    fig = go.Figure()

    # Scatter plot of downsampled data
    fig.add_trace(go.Scattergl(
        x=X_downsampled.squeeze(),
        y=y_downsampled,
        mode="markers",
        name="Downsampled Data"
    ))

    # Fitted line (evaluated at downsampled points for smooth rendering)
    fig.add_trace(go.Scatter(
        x=X_downsampled.squeeze(),
        y=y_pred_downsampled,
        mode="lines",
        name="Fitted Line",
        line=dict(color="red")
    ))

    # Update layout
    fig.update_layout(
        title=f"{y_column} vs {x_column} with Linear Fit",
        xaxis_title=x_column,
        yaxis_title=y_column,
        dragmode="pan"
    )

    # Show the optimized figure
    fig.show()

    # Return fit parameters
    return {"slope": slope, "intercept": intercept, "r2": r2}



In [None]:
fit_results = fit_and_plot(onix_harp, "HarpTime", "Clock", downsample_factor=10)

In [None]:
onix_digital

In [None]:
onix_harp

In [None]:
import sys

def get_memory_usage(var):
    size_in_bytes = sys.getsizeof(var)
    print(f"Memory usage: {size_in_bytes / 1024:.2f} KB ({size_in_bytes} bytes)")

get_memory_usage(onix_analog_data)

In [None]:
CHECK for linearity at loading in utils and report r2 value, only suncronise if r2 > 0.999
FIRST decide who gets correlated to who harpvsOnix or the other way around?
ALSO consider transforming the datetime index in harp_streams to onix clock upon loading

In [None]:
 data = pd.read_csv('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1/B6J2718-2024-12-12T13-28-14/OnixHarp/OnixHarp_1904-01-02T00-00-00.csv',
                header=0)

In [None]:
data

In [None]:
harp_streams

In [None]:
photometry_data