# Compute Phase Locking Value Vectors

In [9]:
import scipy.io
import numpy as np
import os, pickle, re
import pandas as pd

IEEG_DIRECTORY = "../../../Data/ieeg/temporal_neocortical"
OUTPUT_DIRECTORY = "../../../Data/plv/vector"

In [10]:
patient_hup_ids = []

In [11]:
def parse_files_in_directory(directory_path):
    # Initialize dictionaries for HUP id to maximum hour and sample rate
    max_hours = {}
    sample_rates = {}

    # Iterate over each file in the directory
    for filename in os.listdir(directory_path):
        # Check if filename contains "D0"
        if "D0" in filename:
            continue
        # Use regular expression to extract the values from the filename
        match = re.match(r"HUP(\d+)_phaseII_hr_(\d+)_fs_(\d+).pkl", filename)
        if match:
            hup_id, hour, sample_rate = map(int, match.groups())

            # If this HUP id is not in max_hours dictionary, or the current hour is greater
            # than the stored maximum hour, update the maximum hour
            if hup_id not in max_hours or hour > max_hours[hup_id]:
                max_hours[hup_id] = hour

            # If this HUP id is not in the sample_rates dictionary, add it
            if hup_id not in sample_rates:
                sample_rates[hup_id] = sample_rate

    # Convert the dictionaries into lists
    hup_ids = list(max_hours.keys())
    max_hours = list(max_hours.values())
    sample_rates = [sample_rates[hup_id] for hup_id in hup_ids]

    return hup_ids, max_hours, sample_rates


# Call the function
hup_ids, max_hours, sample_rates = parse_files_in_directory(IEEG_DIRECTORY)

# create a dataframe with the HUP id, maximum hour, and sample rate, sort by HUP id
datasets_df = pd.DataFrame(
    {"hup_id": hup_ids, "max_hour": max_hours, "sample_rate": sample_rates}
)
datasets_df = datasets_df.sort_values(by="hup_id")
datasets_df = datasets_df.reset_index(drop=True)
datasets_df

Unnamed: 0,hup_id,max_hour,sample_rate
