In [1]:
from braidz_analysis import braidz
import time
import os

In [2]:
root_folder = "/gpfs/soma_fs/nfc/nfc3008/Experiments/"
exp_list = [
    "20220204_141658.braidz",
    "20220204_181211.braidz"
]

# # Basic usage - will use pyarrow if available, otherwise pandas
# # This requires the FULLPATH of the file - thus `os.path.join(root_folder, exp_list[0])`
# start_time = time.time()
# data = braidz.read_braidz_file(os.path.join(root_folder, exp_list[0]))
# print(f"Time to read using pyarrow: {time.time() - start_time}")

# # Explicitly specify parser
# # Similar to above, but explicitly specify the parser to use
# start_time = time.time()
# data = braidz.read_braidz_file(os.path.join(root_folder, exp_list[0]), parser="pandas")
# print(f"Time to read using pandas: {time.time() - start_time}")

# Multiple files with specific parser
# The root_folder is prepended to each file in the exp_list inside the function
combined_data = braidz.read_multiple_braidz(
    exp_list,
    root_folder=root_folder,
    parser="pyarrow"
)


Processing file 1/2: /gpfs/soma_fs/nfc/nfc3008/Experiments/20220204_141658.braidz


PyArrow parsing failed: Empty CSV file:<zipfile.ZipExtFile name='experiment_info.csv' mode='r'>, falling back to pandas
Empty CSV file <zipfile.ZipExtFile name='experiment_info.csv' mode='r'> encountered


Processing file 2/2: /gpfs/soma_fs/nfc/nfc3008/Experiments/20220204_181211.braidz


PyArrow parsing failed: Empty CSV file:<zipfile.ZipExtFile name='experiment_info.csv' mode='r'>, falling back to pandas
Empty CSV file <zipfile.ZipExtFile name='experiment_info.csv' mode='r'> encountered


Time to read multiple files using pyarrow: 28.870835065841675


# Understanding the Data Structure

The `combined_data` dictionary contains structured data from braidz files with several key components:

## Key Components
- `df`: Contains the main `kalman_estimates` data (always present)
- `stim`: Contains stimulus-related data (if present)
- `opto`: Contains optogenetics data (if present)

Example of checking available keys:
```
print(f"Available data keys: {list(combined_data.keys())}")
```

## Data Structure
The main DataFrame (`combined_data['df']`) contains various measurements and tracking data. You can examine its structure:

```
# View all columns in the main DataFrame
columns = combined_data['df'].columns
print("Available columns:", columns.tolist())
```

## Working with Groups

The data is organized hierarchically using two main identifiers:
- `obj_id`: Unique identifier for each tracked object (can be the same across different experiments)
- `exp_num`: Experiment number for different recording sessions

### Grouping Patterns

1. Group by object:
```
object_groups = combined_data['df'].groupby('obj_id')
```

2. Group by experiment:
```
experiment_groups = combined_data['df'].groupby('exp_num')
```

3. Group by both (most common use case):
```
# This creates groups for each unique object within each experiment
obj_exp_groups = combined_data['df'].groupby(['obj_id', 'exp_num'])
```