In [None]:
import os
import pickle
import pandas as pd
import re
import numpy as np


# Merge the data

In [None]:
# Directory containing the pickle files
DATA_DIR = '/itet-stor/kvulic/neuronies/single_neurons/1_Subprojects/Neurons_As_DNNs/3_Processed_Data/March2025_heart/biTE_stimulation/Full_dataset/Full_files'
OUTPUT_DIR = '/itet-stor/kvulic/neuronies/single_neurons/1_Subprojects/Neurons_As_DNNs/3_Processed_Data/March2025_heart/biTE_stimulation/Full_dataset/CSVs'

DATA_KEYS = ["validated_results"]

In [None]:
# List to store all dataframes
all_data = []

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Dictionary to store all extracted data
data_dict = {key: [] for key in DATA_KEYS}

# Loop through all pickle files in the directory
for file in os.listdir(DATA_DIR):
    if file.endswith(".pkl"):
        file_path = os.path.join(DATA_DIR, file)

        try:
            with open(file_path, "rb") as f:
                data = pickle.load(f)

            for key in DATA_KEYS:
                if key in data:
                    df_data = pd.DataFrame(data[key]) # Convert structured array to DataFrame
                    df = df_data[["source_electrode", "target_electrode",
                      "source_unit_id", "target_unit_id",
                      "lag", "validation", "mTE",
                      "syn probability", "latency_extremum"]
                    ]

                    electrodes_sources = []
                    electrodes_targets = []
                    for _, row in df.iterrows():
                        electrodes_sources.append(data["UNIT_TO_EL"][row["source_unit_id"]])
                        electrodes_targets.append(data["UNIT_TO_EL"][row["target_unit_id"]])

                    df["electrodes_source_unit"] = electrodes_sources
                    df["electrodes_target_unit"] = electrodes_targets
                    df["filename"] = file
                    data_dict[key].append(df)
                
                else:
                    print(f"Warning: key {key} was not in file {file}.")

        except Exception as e:
            print(f"Error processing {file}: {e}")


# Save each dataset as a separate CSV
for key in DATA_KEYS:
    if data_dict[key]:
        merged_df = pd.concat(data_dict[key], ignore_index=True)
        output_file = os.path.join(OUTPUT_DIR, f"{key}_full_data_w_speed_and_firing.pkl")
        #merged_df.to_csv(output_file, index=False)
        with open(output_file, "wb") as f: 
            pickle.dump(merged_df, f)
        print(f"Saved {key} data to {output_file}")
    else:
        print(f"No valid {key} data found.")

# Add information to the data

In [None]:
#data = pd.read_csv(r"Z:\neuronies\single_neurons\1_Subprojects\Neurons_As_DNNs\3_Processed_Data\March2025_heart\biTE_stimulation\Full_dataset\CSVs\validated_results_full_data.csv")
data = np.load('/itet-stor/kvulic/neuronies/single_neurons/1_Subprojects/Neurons_As_DNNs/3_Processed_Data/March2025_heart/biTE_stimulation/Full_dataset/CSVs/validated_results_full_data_w_speed_and_firing.pkl', allow_pickle=True)
data = pd.DataFrame(data)

In [None]:
# Function to parse the paste.txt content and create a lookup dictionary
def parse_experiment_info(text_content):
    # Split the content by sections (each experiment)
    sections = re.split(r'-{20,}', text_content)

    # Dictionary to store experiment information by filename
    experiment_lookup = {}

    # Process each section
    current_experiment = None
    current_frequency = None
    current_delay = None
    current_chip_id = None
    current_network = None
    current_div = None
    current_repetition = None
    current_status = None

    for section in sections:
        if not section.strip():
            continue

        # Extract experiment details
        exp_match = re.search(r'Experiment:\s*([^\n]+)', section)
        if exp_match:
            current_experiment = exp_match.group(1).strip()

        freq_match = re.search(r'Stimulation frequency:\s*([^\n]+)', section)
        if freq_match:
            current_frequency = freq_match.group(1).strip()

        delay_match = re.search(r'Delay:\s*([^\n]+)', section)
        if delay_match:
            current_delay = delay_match.group(1).strip()

        chip_match = re.search(r'Chip ID:\s*([^\n]+)', section)
        if chip_match:
            current_chip_id = chip_match.group(1).strip()

        network_match = re.search(r'Network:\s*([^\n]+)', section)
        if network_match:
            current_network = network_match.group(1).strip()

        div_match = re.search(r'DIV:\s*([^\n]+)', section)
        if div_match:
            current_div = div_match.group(1).strip()

        rep_match = re.search(r'Repetition:\s*([^\n]+)', section)
        if rep_match:
            current_repetition = rep_match.group(1).strip()

        # Process status and filename pairs
        status_file_pairs = re.findall(r'Status:\s*(before|after|after_2)\s*\n(ID\d+_\d+_DIV\d+_DATE\d+_\d+_[^\.]+\.raw_processed_info_metrics\.pkl)', section, re.IGNORECASE)

        for status, filename in status_file_pairs:
            experiment_lookup[filename] = {
                'Experiment': current_experiment,
                'Stimulation_frequency': current_frequency,
                'Delay': current_delay,
                'Chip_ID': current_chip_id,
                'Network': current_network,
                'DIV': current_div,
                'Repetition': current_repetition,
                'Status': status.lower()
            }

    return experiment_lookup


In [None]:
# Read info text document
#with open(r"Z:\neuronies\single_neurons\1_Subprojects\Neurons_As_DNNs\3_Processed_Data\March2025_heart\biTE_stimulation\Full_dataset\info_stimulation_experiments.txt", 'r') as file:
with open('/itet-stor/kvulic/neuronies/single_neurons/1_Subprojects/Neurons_As_DNNs/3_Processed_Data/March2025_heart/biTE_stimulation/Full_dataset/info_stimulation_experiments.txt', 'r') as file:
    # Read the content of the file
    text_content = file.read()

# Create lookup dictionary from the paste.txt content
experiment_lookup = parse_experiment_info(text_content)
print(experiment_lookup)

In [None]:
# Function to add experiment information to the dataframe
def add_experiment_info(row, lookup_dict):
    filename = row['filename']

    # Get info from lookup dictionary
    try:
        if filename in lookup_dict:
            info = lookup_dict[filename]
            return pd.Series([
                info.get('Experiment', None),
                info.get('Stimulation_frequency', None),
                info.get('Delay', None),
                info.get('Chip_ID', None),
                info.get('Network', None),
                info.get('DIV', None),
                info.get('Repetition', None),
                info.get('Status', None)
            ])

    except Exception as e:
        print(f"Error adding filename {filename}")

# Add the new columns to your dataframe
new_cols = ['Experiment', 'Stimulation_frequency', 'Delay', 'Chip_ID', 'Network', 'DIV', 'Repetition', "Status"]
data[new_cols] = data.apply(lambda row: add_experiment_info(row, experiment_lookup), axis=1)

# Display the updated dataframe
print(data[['filename'] + new_cols].head())



In [None]:
data

In [None]:
# Save the updated dataframe to a new pickle or csv file
#data.to_csv(r"Z:\neuronies\single_neurons\1_Subprojects\Neurons_As_DNNs\3_Processed_Data\March2025_heart\biTE_stimulation\Full_dataset\CSVs\validated_results_full_data.csv", index=False)
data.to_pickle('/itet-stor/kvulic/neuronies/single_neurons/1_Subprojects/Neurons_As_DNNs/3_Processed_Data/March2025_heart/biTE_stimulation/Full_dataset/CSVs/validated_results_full_data_w_speed_and_firing.pkl')