In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
og_tables_dir = "../../cnt-data/giant_new_tables"

patient_ids = []

# Iterate through all csv files in og_tables_dir, print out the file name
for file in os.listdir(og_tables_dir):
    if file.endswith(".csv"):
        # Extract HUP_{patient_id}.csv
        patient_id = int(file.split("_")[1].split(".")[0])
        patient_ids.append(patient_id)

In [3]:
# Example frequency band names
band_names = ["delta", "theta", "alpha", "beta", "gamma"]

In [4]:
bandpower_dir = "../../cnt-data/bandpower"
synchrony_dir = "../../cnt-data/synchrony/all/broadband"

In [5]:
for patient_id in patient_ids:
    # Construct the file path for the bandpower and synchrony files
    bandpower_file = f"{bandpower_dir}/HUP_{patient_id}.npy"
    synchrony_file = f"{synchrony_dir}/HUP_{patient_id}.npy"

    # Check if the bandpower file exists
    if not os.path.exists(bandpower_file):
        print(f"Bandpower file for patient {patient_id} not found. Skipping.")
        continue

    # Load the bandpower npy file
    bandpower = np.load(bandpower_file)
    # Load the synchrony npy file
    synchrony = np.load(synchrony_file)

    # Take the mean across the 3rd axis
    bandpower_avg = np.mean(bandpower, axis=2)

    bandpower_len = bandpower.shape[1]
    synchrony_len = synchrony.shape[0]

    # Print bandpower_len and synchrony_len and synchrony_len/bandpower_len
    print(
        f"Patient {patient_id}: bandpower_len={bandpower_len}, synchrony_len={synchrony_len}, synchrony_len/bandpower_len={synchrony_len/bandpower_len}"
    )

    # Load the original table
    og_table = pd.read_csv(f"{og_tables_dir}/HUP_{patient_id}.csv")

    # From the first column where synchrony_broadband is not null, add the bandpower values
    # to the table in the columns "bandpower_{band}" where band is the band name

Patient 219: bandpower_len=170, synchrony_len=2550, synchrony_len/bandpower_len=15.0
Patient 225: bandpower_len=384, synchrony_len=5760, synchrony_len/bandpower_len=15.0
Patient 184: bandpower_len=340, synchrony_len=5100, synchrony_len/bandpower_len=15.0
Patient 190: bandpower_len=660, synchrony_len=9900, synchrony_len/bandpower_len=15.0
Bandpower file for patient 147 not found. Skipping.
Bandpower file for patient 153 not found. Skipping.
Bandpower file for patient 152 not found. Skipping.
Patient 146: bandpower_len=776, synchrony_len=11640, synchrony_len/bandpower_len=15.0
Patient 191: bandpower_len=480, synchrony_len=7200, synchrony_len/bandpower_len=15.0
Patient 185: bandpower_len=512, synchrony_len=7680, synchrony_len/bandpower_len=15.0
Bandpower file for patient 193 not found. Skipping.
Patient 187: bandpower_len=402, synchrony_len=6030, synchrony_len/bandpower_len=15.0
Patient 178: bandpower_len=418, synchrony_len=6270, synchrony_len/bandpower_len=15.0
Bandpower file for patient

In [6]:
target_dir = "../../cnt-data/giant_new_tables_with_bandpower"
os.makedirs(target_dir, exist_ok=True)

band_names = ["delta", "theta", "alpha", "beta", "gamma"]

for patient_id in patient_ids:
    print(f"Processing patient {patient_id}")
    bandpower_file = f"{bandpower_dir}/HUP_{patient_id}.npy"
    synchrony_file = f"{synchrony_dir}/HUP_{patient_id}.npy"

    if not os.path.exists(bandpower_file):
        # print(f"Bandpower file for patient {patient_id} not found. Skipping.")
        continue

    # Load arrays
    bandpower = np.load(
        bandpower_file
    )  # shape might be (5, n*15, #channels) or similar
    synchrony = np.load(
        synchrony_file
    )  # shape (n,) (where each entry is a 2-min block)

    # Average over third axis if needed
    bandpower_avg = np.mean(bandpower, axis=2)  # now shape (5, n*15), for example

    print(np.count_nonzero(~np.isnan(bandpower_avg[1])))

    # Print the number of non-null values in synchrony
    print(np.count_nonzero(~np.isnan(synchrony)))

    bandpower_len = bandpower_avg.shape[
        1
    ]  # total number of 2-min segments in bandpower
    synchrony_len = synchrony.shape[0]  # total number of 2-min segments in synchrony

    # print(
    #     f"Patient {patient_id}: bandpower_len={bandpower_len}, "
    #     f"synchrony_len={synchrony_len}, "
    #     f"ratio={synchrony_len/bandpower_len:.2f}"
    # )

    # Read original CSV
    og_table_path = f"{og_tables_dir}/HUP_{patient_id}.csv"
    og_table = pd.read_csv(og_table_path)

    # Find the first valid row for synchrony_broadband
    first_non_null_idx = og_table["synchrony_broadband"].first_valid_index()
    if first_non_null_idx is None:
        print(f"No valid synchrony_broadband data for patient {patient_id}. Skipping.")
        continue

    # Add NaN columns for bandpower
    for band_name in band_names:
        og_table[f"bandpower_{band_name}"] = np.nan

    # We assume each 30-minute chunk = 15 two-minute segments
    chunk_len = 15

    # Figure out how many 30-min chunks exist in both bandpower and synchrony
    # If bandpower_len = n * 15, then n = bandpower_len // 15 is the number of 30-min chunks.
    n_chunks_bandpower = bandpower_len // chunk_len
    # If synchrony_len = n, that might also imply n 30-minute chunks,
    # or it might be the total # of 2-min segments.
    # Typically, you'd store 1 synchrony value per 2-min block, so n chunks is actually n/15
    # 30-min blocks. But let's assume your code wants to do it chunk by chunk (≥ 1 chunk).
    # If you truly have 1 synchrony value per 2-min block, then the number of 30-min chunks
    # from synchrony’s perspective is synchrony_len // 15.
    # However, your original code used synchrony_len directly as n_chunks.
    # Let's be safe and consider the 30-min-chunk count from synchrony’s perspective:
    n_chunks_synchrony = synchrony_len // 15

    # The actual number of chunks we can fill is the smaller of the two
    n_chunks = min(n_chunks_bandpower, n_chunks_synchrony)

    # Fill the columns
    for chunk_i in range(n_chunks):
        for band_i, band_name in enumerate(band_names):
            # pick the leftmost column for this chunk
            val = bandpower_avg[band_i, chunk_i * chunk_len]

            start_idx = first_non_null_idx + (chunk_i * chunk_len)
            end_idx = start_idx + chunk_len

            if start_idx >= len(og_table):
                break
            end_idx = min(end_idx, len(og_table))

            og_table.loc[start_idx : end_idx - 1, f"bandpower_{band_name}"] = val

    # Save
    out_path = os.path.join(target_dir, f"HUP_{patient_id}.csv")
    og_table.to_csv(out_path, index=False)
    # print(f"Saved updated table for patient {patient_id} to: {out_path}")

Processing patient 219
3
1786
Processing patient 225
7
2498
Processing patient 184
2
4009
Processing patient 190
73
9165
Processing patient 147
Processing patient 153
Processing patient 152
Processing patient 146
13
10243
Processing patient 191
0
6115
Processing patient 185
153
6340
Processing patient 193
Processing patient 187
10
5089
Processing patient 178
277
5509
Processing patient 150
Processing patient 144
0
4890
Processing patient 145
0
7769
Processing patient 151
Processing patient 179
Processing patient 186
26
5990
Processing patient 192
Processing patient 223
0
4051
Processing patient 196
0
4992
Processing patient 182
Processing patient 155
1
5127
Processing patient 141
0
4342
Processing patient 169
0
5871
Processing patient 168
Processing patient 140
Processing patient 154
1
2064
Processing patient 181
Processing patient 142
34
9277
Processing patient 156
Processing patient 157
4
5631
Processing patient 143
Processing patient 180
12
4627
Processing patient 221
4
3993
Process

In [7]:
# count the number of non-null values in bandpower_avg[0]
print(np.count_nonzero(~np.isnan(bandpower_avg[0])))

7


In [9]:
# Example frequency band names
band_names = ["delta", "theta", "alpha", "beta", "gamma"]

for patient_id in patient_ids:
    bandpower_file = f"{bandpower_dir}/HUP_{patient_id}.npy"
    synchrony_file = f"{synchrony_dir}/HUP_{patient_id}.npy"

    # Check if the bandpower file exists
    if not os.path.exists(bandpower_file):
        # print(f"[Patient {patient_id}] bandpower file not found. Skipping.")
        continue

    # Load bandpower (shape might be (5, n*15, X) depending on your data)
    bandpower = np.load(bandpower_file)
    # If your bandpower has shape (5, n*15, n_channels), for instance,
    # we average across the third axis to get shape (5, n*15):
    bandpower_avg = np.mean(bandpower, axis=2)

    # Print overall shape info
    print(f"[Patient {patient_id}] bandpower_avg shape: {bandpower_avg.shape}")

    # For each band (row in bandpower_avg), count non-null values
    for band_i, band_name in enumerate(band_names):
        # Count how many entries are not NaN
        nonnull_count = np.count_nonzero(~np.isnan(bandpower_avg[band_i]))
        print(f"  - {band_name}: {nonnull_count} non-null values")

    print("-" * 40)  # separator for readability

[Patient 219] bandpower_avg shape: (5, 170)
  - delta: 3 non-null values
  - theta: 3 non-null values
  - alpha: 3 non-null values
  - beta: 3 non-null values
  - gamma: 3 non-null values
----------------------------------------
[Patient 225] bandpower_avg shape: (5, 384)
  - delta: 7 non-null values
  - theta: 7 non-null values
  - alpha: 7 non-null values
  - beta: 7 non-null values
  - gamma: 7 non-null values
----------------------------------------
[Patient 184] bandpower_avg shape: (5, 340)
  - delta: 2 non-null values
  - theta: 2 non-null values
  - alpha: 2 non-null values
  - beta: 2 non-null values
  - gamma: 2 non-null values
----------------------------------------
[Patient 190] bandpower_avg shape: (5, 660)
  - delta: 73 non-null values
  - theta: 73 non-null values
  - alpha: 73 non-null values
  - beta: 73 non-null values
  - gamma: 73 non-null values
----------------------------------------
[Patient 146] bandpower_avg shape: (5, 776)
  - delta: 13 non-null values
  - t