In [5]:
import json
import os
from pathlib import Path
import sys

In [7]:
# Create output directory



os.makedirs('data', exist_ok=True)

data_index = 0

# For Google Colab or other environments, try multiple possible paths
possible_paths = [
    os.path.abspath(os.path.join(os.getcwd(), '..', 'data', 'dataset')),  # Standard relative
    '/content/data/dataset',  # Google Colab default
    os.path.expanduser('~/Workspace/Projects/Agronauts/firmware-sensing-l1-l2/data/dataset'),  # Full home path
    '../data/dataset',  # Relative from notebook
]

old_data_dir = None
for path in possible_paths:
    if os.path.exists(path):
        old_data_dir = path
        print(f"Found data directory: {old_data_dir}")
        break

if old_data_dir is None:
    print("ERROR: Could not find data directory!")
    print(f"Current working directory: {os.getcwd()}")
    print("Tried paths:")
    for p in possible_paths:
        print(f"  - {p} (exists: {os.path.exists(p)})")
else:
    # Process animal directories (label=1)
    animal_dirs = sorted([d for d in os.listdir(old_data_dir) if 'animal' in d.lower() and os.path.isdir(os.path.join(old_data_dir, d))])
    print(f"Found {len(animal_dirs)} animal sessions")

    for animal_dir in animal_dirs:
        data_jsonl_path = os.path.join(old_data_dir, animal_dir, 'data.jsonl')
        if os.path.exists(data_jsonl_path):
            # get timestamp from directory name
            timestamp = animal_dir.split('_', 1)[-1]
            print(f"Processing animal session from {timestamp}")
            output_path = f'data/data_label_1_{data_index}_{timestamp}.jsonl'
            with open(data_jsonl_path, 'r') as f_in:
                with open(output_path, 'w') as f_out:
                    for line in f_in:
                        # Parse, remove timestamp, write back
                        data = json.loads(line)
                        data_no_timestamp = {k: v for k, v in data.items() if k != 'timestamp'}
                        f_out.write(json.dumps(data_no_timestamp) + '\n')
            data_index += 1

    # Process human directories (label=0)
    human_dirs = sorted([d for d in os.listdir(old_data_dir) if 'human' in d.lower() and os.path.isdir(os.path.join(old_data_dir, d))])
    print(f"Found {len(human_dirs)} human sessions")

    for human_dir in human_dirs:
        data_jsonl_path = os.path.join(old_data_dir, human_dir, 'data.jsonl')
        if os.path.exists(data_jsonl_path):
            # get timestamp from directory name
            timestamp = human_dir.split('_', 1)[-1]
            print(f"Processing human session from {timestamp}")
            output_path = f'data/data_label_0_{data_index}_{timestamp}.jsonl'
            with open(data_jsonl_path, 'r') as f_in:
                with open(output_path, 'w') as f_out:
                    for line in f_in:
                        # Parse, remove timestamp, write back
                        data = json.loads(line)
                        data_no_timestamp = {k: v for k, v in data.items() if k != 'timestamp'}
                        f_out.write(json.dumps(data_no_timestamp) + '\n')
            data_index += 1

    print(f"\nConversion complete! Created {data_index} files in data/")
    print(f"Animal files: data_label_1_*.jsonl")
    print(f"Human files: data_label_0_*.jsonl")

Found data directory: /Users/wanghley/Workspace/Projects/Agronauts/firmware-sensing-l1-l2/data/dataset
Found 24 animal sessions
Processing animal session from 20251112_225331
Processing animal session from 20251112_225408
Processing animal session from 20251112_225450
Processing animal session from 20251112_225601
Processing animal session from 20251112_225642
Processing animal session from 20251112_225726
Processing animal session from 20251112_225800
Processing animal session from 20251112_225831
Processing animal session from 20251112_230832
Processing animal session from 20251112_230904
Processing animal session from 20251112_230939
Processing animal session from 20251112_231013
Processing animal session from 20251112_231054
Processing animal session from 20251112_231122
Processing animal session from 20251112_231147
Processing animal session from 20251112_231209
Processing animal session from 20251112_231232
Processing animal session from 20251112_232212
Processing animal session 