In [1]:
import os, random
import numpy as np
import pandas as pd
import scipy.io as sio

In [2]:
ERIN_DIRECTORY = "../../../../erinconr/projects/fc_toolbox/results/all_out"

In [3]:
good_hup_ids_for_spike_detector = np.load("../good_hup_ids_for_spike_detector.npy")
good_hup_ids_for_spike_detector

array([137, 138, 139, 140, 141, 142, 143, 145, 146, 148, 150, 151, 152,
       153, 154, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166,
       167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180,
       181, 182, 184, 185, 186, 187, 188, 189, 190, 191, 192, 196, 197,
       199, 201, 202, 204, 205, 206, 207, 209, 210, 211, 213, 214, 215,
       219, 221, 223, 224, 225])

In [4]:
# Load HUP_implant_dates.xlsx
nina_patients_df = pd.read_excel("../../../Data/HUP_implant_dates.xlsx")
# Make the hup_id column integers
nina_patients_df["hup_id"] = nina_patients_df["hup_id"].astype(int)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


In [5]:
# Add a boolean column in nina_patients_df called is_single_dataset and make it True if IEEG_Portal_Number ends with "phaseII"
nina_patients_df["is_single_dataset"] = nina_patients_df[
    "IEEG_Portal_Number"
].str.endswith("phaseII")
# Add a boolean column in nina_patients_df called is_good_for_spike_detector and make it True if the row's hup_id is in good_hup_ids_for_spike_detector
nina_patients_df["is_good_for_spike_detector"] = nina_patients_df["hup_id"].isin(
    good_hup_ids_for_spike_detector
)

In [6]:
# Drop the rows in nina_patients_df where is_single_dataset is False
nina_patients_df = nina_patients_df[nina_patients_df.is_single_dataset == True]
# Drop the rows in nina_patients_df where is_good_for_spike_detector is False
nina_patients_df = nina_patients_df[nina_patients_df.is_good_for_spike_detector == True]
# Sort by hup_id in ascending order
nina_patients_df = nina_patients_df.sort_values(by=["hup_id"], ascending=True)
# Drop columns Implant_Date, implant_time, Explant_Date, weight_kg
nina_patients_df = nina_patients_df.drop(
    columns=["Implant_Date", "implant_time", "Explant_Date", "weight_kg"]
)
# Reset index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,138,HUP138_phaseII,True,True
1,139,HUP139_phaseII,True,True
2,141,HUP141_phaseII,True,True
3,142,HUP142_phaseII,True,True
4,143,HUP143_phaseII,True,True
5,145,HUP145_phaseII,True,True
6,146,HUP146_phaseII,True,True
7,150,HUP150_phaseII,True,True
8,151,HUP151_phaseII,True,True
9,154,HUP154_phaseII,True,True


In [10]:
for index, row in nina_patients_df.iterrows():
    hup_id = row["hup_id"]
    dataset_name = row["IEEG_Portal_Number"]

    erin_mat_file = sio.loadmat(os.path.join(ERIN_DIRECTORY, f"HUP{hup_id}_pc.mat"))

    mat_content = erin_mat_file["pc"]
    mat_content.dtype

    name = mat_content[0, 0]["name"]
    file = mat_content[0, 0]["file"]
    file.dtype

    name = file[0, 0]["name"]
    run = file[0, 0]["run"]
    run.dtype

    data = run[0]["data"]
    run_times = run[0]["run_times"]
    block_times = run[0]["block_times"]
    cohere_out = run[0]["cohere_out"]

    assert len(data) == len(run_times) == len(block_times) == len(cohere_out) > 100
    # Taking first 2 and last 2 elements
    first_two = run_times[:2].tolist()
    last_two = run_times[-2:].tolist()

    # Selecting 16 random elements from the middle and sorting them
    middle_indices = sorted(random.sample(range(2, len(run_times) - 2), 18))
    middle_elements = [run_times[i] for i in middle_indices]

    # Combining the lists
    all_indices = [0, 1] + middle_indices
    all_elements = first_two + middle_elements
    assert len(all_indices) == len(all_elements) == 20

    # Iterating through the selected elements and their indices
    for i, clip in zip(all_indices, all_elements):
        clip = clip[0]
        start_time, end_time = clip
        start_time = int(start_time)
        end_time = int(end_time)
        montage = data[i][0]["montage"][0]["name"][0][1][0]
        assert montage == "car"
        spikes = data[i][0]["montage"][0]["spikes"][0][1]
        num_spikes = len(spikes)
        print(f"Clip {i}, {start_time} to {end_time}, {num_spikes} spikes")

Clip 0, 87 to 147, 0 spikes
Clip 1, 1096 to 1156, 0 spikes
Clip 11, 7049 to 7109, 0 spikes
Clip 74, 44407 to 44467, 0 spikes
Clip 80, 48468 to 48528, 9 spikes
Clip 125, 75161 to 75221, 9 spikes
Clip 188, 113098 to 113158, 0 spikes
Clip 329, 197794 to 197854, 0 spikes
Clip 358, 215334 to 215394, 0 spikes
Clip 414, 248747 to 248807, 6 spikes
Clip 497, 298552 to 298612, 12 spikes
Clip 552, 331310 to 331370, 0 spikes
Clip 658, 395319 to 395379, 16 spikes
Clip 744, 446650 to 446710, 0 spikes
Clip 748, 448827 to 448887, 20 spikes
Clip 767, 460543 to 460603, 169 spikes
Clip 812, 487697 to 487757, 88 spikes
Clip 877, 526544 to 526604, 4 spikes
Clip 989, 593603 to 593663, 2 spikes
Clip 995, 597366 to 597426, 0 spikes
Clip 0, 103 to 163, 28 spikes
Clip 1, 1023 to 1083, 36 spikes
Clip 114, 68916 to 68976, 49 spikes
Clip 118, 70907 to 70967, 83 spikes
Clip 205, 123324 to 123384, 75 spikes
Clip 307, 184249 to 184309, 28 spikes
Clip 733, 439825 to 439885, 5 spikes
Clip 784, 470894 to 470954, 24 spik

KeyboardInterrupt: 

Verify the following 20 time clips:
1. The first 2 clips
2. Random 18 clips in the middle (until the last two)

The reason is that the last two might be incomplete.

1. First, make sure the number of spikes are the same or smaller
2. Second, make sure the channels match up
3. Fuzzy time