### Get first and last trial range (based on trained video) to plot


In [None]:
import pandas as pd

# Define file paths
arhmm_latents_path = '/local1/video-analysis/scratch/706893_2024-05-28_15-15-38/results/xinxin/coupled-baiting_' + bodypart + '/706893/2024-05-28/dfs/arhmm_latents_df_.csv'
df_original_path = '/local1/video-analysis/scratch/706893_2024-05-28_15-15-38/results/xinxin/coupled-baiting_' + bodypart + '/706893/2024-05-28/dfs/df_original.csv'

# Load the CSV files
arhmm_latents_df = pd.read_csv(arhmm_latents_path)
df_original = pd.read_csv(df_original_path)

# Get the first Harp_Timestamp from arhmm_latents_df
first_harp_timestamp = arhmm_latents_df["Harp_Timestamp"].iloc[0]

# Find the closest next trial timestamp in df_original
first_analyzed_trial_idx = (df_original["start_time"] > first_harp_timestamp).idxmax()  # Get index of the first trial after first_harp_timestamp in analyzed video
first_analyzed_trial_event_timestamp = df_original.loc[first_analyzed_trial_idx, "start_time"]
first_analyzed_trial_number = df_original.loc[first_analyzed_trial_idx, "Trial"]

# Print results
print(f"First Harp_Timestamp from arhmm_latents_df: {first_harp_timestamp}")
print(f"Closest next trial Harp_Timestamp (start_time) from df_original: {first_analyzed_trial_event_timestamp} (Trial {first_analyzed_trial_number})")
print('')

# Get the last non-NaN Harp_Timestamp from arhmm_latents_df
last_harp_timestamp = arhmm_latents_df["Harp_Timestamp"].dropna().iloc[-1]

# # Find the closest prior trial timestamp in df_original
# Drop NaNs from the Harp_Timestamp column
df_original_clean = df_original.dropna(subset=["stop_time"])

# Find the index of the last trial before last_harp_timestamp
last_analyzed_trial_idx = df_original_clean[df_original_clean["stop_time"] < last_harp_timestamp].index[-1]
last_analyzed_trial_event_timestamp = df_original.loc[last_analyzed_trial_idx, "stop_time"]

# Get the trial number of the closest prior trial
last_analyzed_trial_number = df_original_clean.loc[last_analyzed_trial_idx, "Trial"]


# Print results
print(f"Last Harp_Timestamp from arhmm_latents_df: {last_harp_timestamp}")
print(f"Closest prior trial Harp_Timestamp from df_events: {last_analyzed_trial_event_timestamp} (Trial {last_analyzed_trial_number})")


In [None]:
First Harp_Timestamp from arhmm_latents_df: 6574786.711488
Closest next trial Harp_Timestamp (start_time) from df_original: 6574787.602496 (Trial 9)

Last Harp_Timestamp from arhmm_latents_df: 6578185.45952
Closest prior trial Harp_Timestamp from df_events: 6578181.317504 (Trial 273)

### Get num_initial_frames_to_exclude and num_late_frames_to_exclude 

In [None]:
import numpy as np
import pandas as pd

# Example usage
hdf5_path = '/local1/video-analysis/scratch/706893_2024-05-28_15-15-38/data/xinxin/coupled-baiting_' + bodypart + '/706893/2024-05-28/data.hdf5'
set_harp_ts = True  

ae_latent_path = '/local1/video-analysis/scratch/706893_2024-05-28_15-15-38/results/xinxin/coupled-baiting_' + bodypart + '/706893/2024-05-28/dfs/ae_latents_df.csv'
ae_latents_df = pd.read_csv(ae_latent_path)

# Initialize exclusion counters
total_num_initial_frames_to_exclude, total_num_late_frames_to_exclude = 0, 0

if set_harp_ts:
    # Exclude frames from the beginning of the list
    selected_segments = np.arange(ae_latents_df['seg_index'].iloc[0])  

    n_frames_dict, total_num_initial_frames_to_exclude, total_num_frames_in_segments = get_frame_counts(hdf5_path, selected_segments)

    # Print results
    print("Total number of segments in the HDF5 file:", total_num_frames_in_segments)
    print("Total number of excluded frames for EARLY segments:", total_num_initial_frames_to_exclude)
    print("Frame counts per segments:", n_frames_dict)


    # Exclude frames from the end of the list
    selected_segments = np.arange(ae_latents_df['seg_index'].iloc[-1]+1, total_num_frames_in_segments)  

    n_frames_dict_, total_num_late_frames_to_exclude, _ = get_frame_counts(hdf5_path, selected_segments)

    # Print results
    print('')
    print("Total number of excluded frames for LATE segments:", total_num_late_frames_to_exclude)
    print("Frame counts per segments:", n_frames_dict_)


In [None]:
Total number of segments in the HDF5 file: 178
Total number of excluded frames for EARLY segments: 4071
Frame counts per segments: {'trial_0000': 1000, 'trial_0001': 1071, 'trial_0002': 1000, 'trial_0003': 1000}

Total number of excluded frames for LATE segments: 3667
Frame counts per segments: {'trial_0174': 994, 'trial_0175': 953, 'trial_0176': 1000, 'trial_0177': 720}


### Get the number of frames in the selected_segments

In [None]:
import h5py

def get_frame_counts(hdf5_path, selected_trials=None):
    """
    Reads an HDF5 file and extracts the number of frames for each selected trial.

    Parameters:
        hdf5_path (str): Path to the HDF5 file.
        selected_trials (list, optional): List of trial indices to include. If None, all trials will be used.

    Returns:
        dict: A dictionary with trial keys and their corresponding frame counts.
        int: The total number of frames across the selected trials.
        int: The total number of trials in the HDF5 file.
    """
    n_frames_dict = {}

    with h5py.File(hdf5_path, 'r') as f:
        images_group = f['images']
        total_num_trials = len(images_group.keys())  # Get the total number of trials

        # If no specific trials are selected, use all available trials
        if selected_trials is None:
            selected_trials = list(range(total_num_trials))

        # Loop over the selected trial indices
        for idx in selected_trials:
            trial_key = f"trial_{idx:04d}"
            if trial_key in images_group:
                n_frames = images_group[trial_key].shape[0]  # Extract number of frames
                n_frames_dict[trial_key] = n_frames

    # Compute total frames across selected trials
    total_num_frames = sum(n_frames_dict.values())

    return n_frames_dict, total_num_frames, total_num_trials

