In [1]:
import os
import pandas as pd
import glob


def load_session_data(rec_path):
    """
    Load a single session's HDF5 file by dynamically searching the MIR_Aligned folder.

    Selection logic:
      1) If exactly one file matches aligned_predictions_with_ca_and_dF_F*.h5, use it.
      2) Otherwise, pick the file with the longest stem (most extra info).
    """
    rec_path = Path(rec_path)
    aligned_dir = rec_path / "MIR_Aligned"

    # find all matching .h5 files
    h5_paths = list(aligned_dir.glob("aligned_predictions_with_ca_and_dF_F*.h5"))
    if not h5_paths:
        raise FileNotFoundError(f"No .h5 files found in {aligned_dir}")

    # selection rule
    if len(h5_paths) == 1:
        hdf5_file_path = h5_paths[0]
    else:
        # choose the file whose stem has the most characters
        hdf5_file_path = max(h5_paths, key=lambda p: len(p.stem))

    print("Using:", hdf5_file_path)

    # load dataframe
    df = pd.read_hdf(hdf5_file_path, key='df')

    # extract metadata
    session_id      = rec_path.name
    recording_date  = rec_path.parent.name
    experiment_name = rec_path.parent.parent.name

    df['session_id']     = session_id
    df['recording_date'] = recording_date
    df['experiment']     = experiment_name
    df['session_path']   = str(rec_path)
    df['file_path']      = str(hdf5_file_path)

    return df

def load_sessions_from_csv(csv_filepath, base_path, verbose=True):
    """
    Load session data from a CSV file containing relative session paths.
    
    This function assumes that the CSV file contains a column (by default, it will look for 
    a column named 'relative_path'; if not found, it will use the first column) that lists the 
    relative paths for each session. The provided base path is then prepended to each relative path 
    to form the full session directory. Finally, it uses your preexisting function load_session_data 
    to load each session's data.
    
    Parameters:
      - csv_filepath (str): The path to the CSV file containing the relative session paths.
      - base_path (str): The base path to be prepended to each relative path.
      - verbose (bool): If True, print messages when a session fails to load.
    
    Returns:
      - sessions (list): A list of DataFrames, one for each successfully loaded session.
    """
    try:
        df_paths = pd.read_csv(csv_filepath)
    except Exception as e:
        print(f"Error reading CSV file at {csv_filepath}: {e}")
        return []
    
    # Determine which column contains the relative paths. Look for a column named 'relative_path'
    # otherwise default to the first column.
    if 'relative_path' in df_paths.columns:
        relative_paths = df_paths['relative_path'].tolist()
    else:
        relative_paths = df_paths.iloc[:, 0].tolist()
    
    sessions = []
    for rel_path in relative_paths:
        # Build the full session path using the base path
        session_path = os.path.join(base_path, rel_path)
        try:
            df_session = load_session_data(session_path)
            sessions.append(df_session)
        except Exception as e:
            if verbose:
                print(f"Could not load session at {session_path}: {e}")
    return sessions

# =============================================================================
# Example Usage:
# =============================================================================
# Set the base path that will be prepended to each relative path.
base_path = "/data/big_rim/rsync_dcc_sum/Oct3V1" #"/hpc/group/tdunn/Bryan_Rigs/BigOpenField/Oct3V1"

# CSV file that contains the relative session paths.
csv_file = "/home/lq53/mir_repos/BBOP/random_tests/25mar_minibbop_integration/250331_sum_aligned_good_path_relative.csv" #"/hpc/group/tdunn/Bryan_Rigs/BigOpenField/2504_mir_loader/250331_sum_aligned_good_path_relative.csv"
# Load all sessions
all_sessions = load_sessions_from_csv(csv_file, base_path)
print(f"Loaded {len(all_sessions)} sessions.")

Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_14/20240916v1r1_16_37: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_14/20240916v1r1_16_53: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_14/20240916v1r2_14_30: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_14/20240916v1r2_15_58: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_17/20240819V1r1_13_41: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_17/20240819V1r1_14_25: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_24/20241001PMCr2_16_19: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_dcc_sum/Oct3V1/2024_10_25/20241002PMCr2_16_25: name 'Path' is not defined
Could not load session at /data/big_rim/rsync_