In [1]:
import pandas as pd
import os
import json

In [2]:
# Constantes
EEG_SAMPLING_RATE = 125  # Hz
EEG_COLUMNS = ['Fp1', 'Fp2', 'C3', 'C4', 'P7', 'P8', 'O1', 'O2', 'F7', 'F8', 'F3', 'F4', 'T7', 'T8', 'P3', 'P4']

In [14]:
def process_experiment(eeg_file, timestamps_file):
    # Extraer la información de la persona del nombre del archivo
    info = eeg_file.split('_')[-1].split('.')[0].split('-')
    name , info = eeg_file.split("_")[-1].split(".")[0].split(",")
    sex, age = info.split('-')[:2]
    sex = 1 if sex.lower() == 'm' else 0

    # Cargar datos EEG
    eeg_df = pd.read_csv(eeg_file)

    # Cargar timestamps
    timestamps_df = pd.read_csv(timestamps_file, header=None, names=['Song', 'Timestamp'])

    # Add a column for end time of each image
    timestamps_df['EndTimestamp'] = timestamps_df['Timestamp'].shift(-1)
    
    for i in range(0,80,2):
        timestamps_df.loc[i,"EndTimestamp"]=timestamps_df.loc[i,"Timestamp"]

    for i in range(0,80,2):
        timestamps_df.loc[i,"Timestamp"]=timestamps_df.loc[i,"Timestamp"]-30

    for i in range(1,79,2):
        timestamps_df.loc[i,"EndTimestamp"]=timestamps_df.loc[i,"Timestamp"]+2

    timestamps_df.loc[79,"EndTimestamp"]=timestamps_df.loc[79,"Timestamp"]+2

    #print(timestamps_df)
    # Procesar cada segmento de tiempo
    songs_data = []
    for index, row in timestamps_df.iterrows():
        start_time = row['Timestamp']
        end_time = row['EndTimestamp'] if not pd.isna(row['EndTimestamp']) else timestamps_df['Timestamp'].iloc[-1]
        
        start_index = int(start_time * EEG_SAMPLING_RATE)
        end_index = int(end_time * EEG_SAMPLING_RATE)
        eeg_chunk = eeg_df.iloc[start_index:end_index].values.tolist()
        if 'no_stimuli' in row['Song']:
            chunk_label = 'no_stimuli'
        else:
            chunk_label = row['Song'].split('/')[1]  # Extract category from the path

        songs_data.append({
            "file_path": row['Song'],
            "class": chunk_label,
            "EEG_signal": eeg_chunk
        })

    return {
        "name": name,
        "sex": sex,
        "age": int(age),
        "view_songs": songs_data
    }

def consolidate_data(directory):
    all_data = {"subjects": []}
    for filename in os.listdir(directory):
        if filename.startswith("output_file"):
            eeg_file = os.path.join(directory, filename)
            timestamps_file = os.path.join(directory, filename.replace("output_file", "timestamps"))
            person_data = process_experiment(eeg_file, timestamps_file)
            all_data["subjects"].append(person_data)
    return all_data

# Uso de la función para consolidar datos
directory = "datos_raw"  # Directorio donde están los archivos
consolidated_data = consolidate_data(directory)


In [16]:
# Guardar los datos consolidados en un archivo JSON
with open('SynapSound_data.json', 'w') as json_file:
    json.dump(consolidated_data, json_file)