In [None]:
import os
import json
import numpy as np

def load_custom_emg_data(dataset_path):
    all_emg_data = []
    labels = []

    # Go through all JSON files in the dataset folder (flat or nested)
    for main_folder in ['trainingJSON']:
        folder_path = os.path.join(dataset_path, main_folder)

        for user_folder in os.listdir(folder_path):
            user_path = os.path.join(folder_path, user_folder)
            if not os.path.isdir(user_path):
                continue

            # Search for the JSON file inside each user folder
            for file in os.listdir(user_path):
                if file.endswith('.json'):
                    json_path = os.path.join(user_path, file)
                    with open(json_path, 'r') as f:
                        data = json.load(f)

                    # trainingSamples is a dict of idx_1, idx_2, ...
                    samples = data.get("trainingSamples", {})
                    for sample in samples.values():
                        gesture_name = sample.get("gestureName", "unknown")
                        emg = sample.get("emg", {})

                        # Convert 8 channels into 8 x 992 numpy array
                        ch_values = [emg.get(f"ch{i+1}", []) for i in range(8)]
                        if all(len(ch) == len(ch_values[0]) for ch in ch_values):  # sanity check
                            emg_matrix = np.array(ch_values)  # shape: (8, 992)
                            all_emg_data.append(emg_matrix)
                            labels.append(gesture_name)

    return all_emg_data, labels

dataset_path = '/home/codebaker/Desktop/EMG-EPN612 Dataset'
emg_data, gesture_labels = load_custom_emg_data(dataset_path)

print(f"Loaded {len(emg_data)} gestures")
print(f"Unique gestures: {set(gesture_labels)}")

if emg_data:
    print(f"Sample shape: {emg_data[0].shape}")
