In [6]:
import os
import json
import pandas as pd

# Define the root directory
root_dir = 'samples'
waveform_file = 'waveform_responses.csv'
dataset_file = 'dataset.csv'

# Initialize lists to store the compiled data
data = []
waveform_data = []

sample_idx = range(10)
sim_idx = range(2)
nsim_idx = range(35)

n_fibers_by_sim = {
    0: 13,
    1: 34
}

time_data = None

# Traverse the directory structure
for sample_id in sample_idx:
    sims_path = os.path.join(root_dir, str(sample_id), 'models', '0', 'sims')
    
    for sim_id in sim_idx:
        n_sims_path = os.path.join(sims_path, str(sim_id), 'n_sims')
        
        for nsim in nsim_idx:
            nsim_path = os.path.join(n_sims_path, str(nsim))
            
            # Read the JSON file for pulse width and frequency
            sim_file = os.path.join(nsim_path, f'{nsim}.json')
            if not os.path.isfile(sim_file):
                continue
            
            with open(sim_file, 'r') as f:
                sim_data = json.load(f)
            
            pulse_width = sim_data['waveform']['BIPHASIC_PULSE_TRAIN']['pulse_width']
            inter_phase = sim_data['waveform']['BIPHASIC_PULSE_TRAIN']['inter_phase']
            pulse_freq = sim_data['waveform']['BIPHASIC_PULSE_TRAIN']['pulse_repetition_freq']
            unit = sim_data['waveform']['global']['unit']
            dt = sim_data['waveform']['global']['dt']

            amplitudes = sim_data['protocol']['amplitudes']
            n_amplitudes = len(amplitudes)
            n_fibers = n_fibers_by_sim[sim_id]
            
            # Initialize accumulators for the activations and waveforms
            total_activations = 0
            waveform_data_list = []
            
            # Read the activation and SFAP files
            data_outputs_path = os.path.join(nsim_path, 'data', 'outputs')
            
            for f in range(n_fibers):
                for a in range(n_amplitudes):
                    activation_file = os.path.join(data_outputs_path, f'activation_inner0_fiber{f}_amp{a}.dat')
                    sfap_file = os.path.join(data_outputs_path, f'SFAP_time_inner0_fiber{f}_amp{a}.dat')
                    
                    if os.path.isfile(activation_file):
                        with open(activation_file, 'r') as af:
                            total_activations += int(af.read().strip())
                    
                    if os.path.isfile(sfap_file):
                        sfap_df = pd.read_csv(sfap_file, delimiter='\s+', skiprows=1, header=None)
                        waveform_data_list.append(sfap_df)
            
            # Aggregate waveforms across all fibers and amplitudes
            if waveform_data_list:
                aggregated_waveform = pd.concat(waveform_data_list).groupby(0).sum().reset_index()
                if time_data is None:
                    time_data = aggregated_waveform.iloc[:, 0].values
                waveform_data.append(aggregated_waveform.iloc[:, 1].values)
                waveform_reference = len(waveform_data)
            else:
                waveform_reference = None
            
            # Append the compiled data
            data.append({
                'pulse_width': pulse_width,
                'frequency': pulse_freq,
                'amplitude': amplitudes[a],
                'total_activations': total_activations,
                'waveform_index': waveform_reference
            })

# Convert the data into a DataFrame
df = pd.DataFrame(data)

# Combine all waveforms into a single DataFrame
if waveform_data:
    waveform_df = pd.DataFrame(waveform_data).T
    time_column = time_data
    waveform_df.insert(0, 'time', time_column)
    waveform_df.to_csv(waveform_file, index=False)

# Save the DataFrame to a CSV file
df.to_csv(dataset_file, index=False)

print("Dataset compilation complete. Data saved to 'dataset.csv'. Waveform data saved to 'waveform_responses.csv'.")


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.