
# JSON to HDF5 Workflow

This notebook walks through converting Unity behavioral JSON logs to HDF5 using `behavioral_analysis.processing.json_to_hdf5_processor`.


In [None]:

from pathlib import Path
import sys

REPO_ROOT = Path.cwd().resolve()
SRC_CANDIDATES = [
    REPO_ROOT / 'src',
    REPO_ROOT.parent / 'src',
    (REPO_ROOT / '..' / 'src').resolve(),
]

for candidate in SRC_CANDIDATES:
    if candidate.exists():
        src_path = candidate.resolve()
        break
else:
    raise RuntimeError('Could not locate the package src directory. Update the path setup cell.')

if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

print(f'Added {src_path} to sys.path')


## Imports
Load the processing helpers we'll use in this workflow.

In [None]:

import json
from pathlib import Path

import pandas as pd

from behavioral_analysis.processing.json_to_hdf5_processor import process_json_to_hdf5
from behavioral_analysis.io.hdf5_writer import list_hdf5_contents



## Choose a JSON log
Set `JSON_PATH` to the Unity log you want to convert. If the path does not exist, a compact demo dataset is generated so you can run through the workflow end-to-end.


In [None]:
JSON_PATH = Path('/groups/spruston/home/moharb/DELTA_Behavior/Log BM35 2025-09-22 session 1.json')
CUE_WINDOW_HALF_WIDTH_CM = 10.0

if JSON_PATH.exists():
    print(f'Using JSON log: {JSON_PATH}')
else:
    print('JSON_PATH does not exist; creating a synthetic demo dataset.')
    



## Run the conversion
The helper wraps the full pipeline: parsing the JSON, building pandas DataFrames, detecting corridors, optionally generating trial summaries, and writing everything to HDF5.


In [None]:
OUTPUT_DIR = Path('outputs')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

OUTPUT_PATH = OUTPUT_DIR / f"{JSON_PATH.stem}_with_global_position.h5"

result_path = process_json_to_hdf5(
    input_file=str(JSON_PATH),
    output_file=str(OUTPUT_PATH),
    corridor_length_cm=500.0,
    include_trials=True,
    include_combined=False,
    verbose=True,
    cue_window_half_width_cm=CUE_WINDOW_HALF_WIDTH_CM,
)
print(f'Processed HDF5 saved to: {result_path}')



## Inspect the HDF5 contents
Use the I/O helpers or pandas directly to see what was stored.


In [None]:

contents = list_hdf5_contents(result_path)
contents



## Preview key tables
Here we look at the position trace with the derived global position and any trials that were generated.


In [None]:
with pd.HDFStore(result_path, mode='r') as store:
    position_preview = store['events/Position'].head()
    corridor_info = store['events/Corridor_Info']
    trials = store.get('events/Trials')
    lick_positions = store.get('events/Lick_Position')
    lick_windows = store.get('events/Lick_Cue_Window')

trial_preview = trials.head() if trials is not None else 'No trials table found'
lick_pos_preview = lick_positions.head() if lick_positions is not None else 'No lick position table found'
lick_win_preview = lick_windows.head() if lick_windows is not None else 'No lick window table found'

position_preview, trial_preview, lick_pos_preview, lick_win_preview


In [None]:
corridor_info

In [None]:
if 'lick_windows' in globals() and lick_windows is not None:
    lick_windows[['trial_id', 'cue_global_position_cm', 'global_position_cm', 'lick_offset_from_cue_cm']].head()
else:
    'No lick window table found'
