# Process Erin's .mat files for alpha-delta ratio

In [3]:
import scipy.io
from scipy.stats import iqr
import numpy as np
import os

AD_RATIO_DIRECTORY = "../../Data/ad_ratios"
ERIN_DIRECTORY = (
    "../../../../erinconr/projects/fc_toolbox/results/analysis/intermediate/"
)

0 soz (1, 1)
1 name (1,)
2 times (2392, 1)
3 spikes (161, 2392)
4 coi_global (2392, 1)
5 rl (161, 2392)
6 labels (161, 1)
7 bipolar_labels (161, 1)
8 bipolar_pair (148, 2)
9 locs (161, 3)
10 bipolar_locs (161, 3)
11 anatomy (161, 1)
12 bad_anatomy_flag (1, 1)
13 ana_loc (161, 1)
14 ana_lat (161, 1)
15 file_times (2392, 1)
16 file_index (2392, 1)
17 ad (161, 2392)
18 block_dur (1, 1)
19 ns (161, 2392)
20 n_rm_ictal (1, 1)
21 sz_times (3, 2)
22 sz_semiology (3, 1)
23 rid (1, 1)
24 avg_fc (161, 161)
25 seq_info (2, 2392)
26 clinical (1, 1)
27 leader (161, 2392)
28 mod_midnight (2392, 1)
29 ns_car (161, 2392)
30 avg_fc_car (161, 161)
31 ns_bi (161, 2392)
32 avg_fc_bi (161, 161)
33 bp (161, 5, 2392)
34 bp_bi (161, 5, 2392)
35 avg_coh (12880, 6)
36 avg_coh_bi (12880, 6)
37 good_spikes (1, 1)
38 native_locs (161, 3)
39 native_bipolar_locs (161, 3)
40 fc_car_ws (2, 1)
41 fc_bi_ws (2, 1)
42 spikes_ws (2, 1)
43 rl_ws (2, 1)
44 coh_car_ws (2, 1)
45 coh_bi_ws (2, 1)
46 bp_bi_ws (2, 1)
47 bp_car_ws (2, 1)

In [4]:
temp = []
# Iterate through all .mat files in ../../../erinconr/projects/fc_toolbox/results/analysis/intermediate/
for filename in os.listdir(ERIN_DIRECTORY):
    mat_file = scipy.io.loadmat(
        f"../../../../erinconr/projects/fc_toolbox/results/analysis/intermediate/{filename}"
    )
    mat_file = mat_file["summ"][0]
    for index, key in enumerate(mat_file.dtype.names):
        # print(index, key, mat_file[0][index].shape)
        if key == "ad":
            temp.append(index)
ad_ratio_index = np.unique(np.array(temp, dtype=int))
assert ad_ratio_index.shape[0] == 1
ad_ratio_index = ad_ratio_index[0]

In [5]:
ad_ratio_index

17

## Hourly alpha-delta ratio

In [3]:
def average_samples(patient_ad_ratio):
    # Get the shape of the original array
    original_shape = patient_ad_ratio.shape

    # Calculate new shape
    num_channels = original_shape[0]
    num_samples = original_shape[1]
    num_whole_groups = num_samples // 6
    num_remaining_samples = num_samples % 6

    # Reshape the array to average every 6 samples
    whole_groups_array = patient_ad_ratio[:, : num_whole_groups * 6].reshape(
        num_channels, num_whole_groups, 6
    )
    averaged_array = whole_groups_array.mean(axis=2)

    if num_remaining_samples > 0:
        remaining_array = patient_ad_ratio[:, num_whole_groups * 6 :].mean(
            axis=1, keepdims=True
        )
        averaged_array = np.concatenate((averaged_array, remaining_array), axis=1)

    return averaged_array


# # Test function with some random data
# # Create a 4x31 numpy array, i.e. 4 channels with 31 samples each
# patient_ad_ratio = np.random.rand(4, 32)
# averaged_array = average_samples(patient_ad_ratio)
# print(patient_ad_ratio, averaged_array)

In [4]:
for filename in os.listdir(ERIN_DIRECTORY):
    patient_hup_id = int(filename[3:6])
    mat_file = scipy.io.loadmat(
        f"../../../erinconr/projects/fc_toolbox/results/analysis/intermediate/{filename}"
    )
    mat_file = mat_file["summ"][0][0]
    patient_ad_ratio = mat_file[ad_ratio_index]
    print(patient_ad_ratio.shape)
    num_channels = mat_file[6].shape[0]
    assert patient_ad_ratio.shape[0] == num_channels
    hourly_ad_ratio = average_samples(patient_ad_ratio)
    num_hours = hourly_ad_ratio.shape[1]
    assert hourly_ad_ratio.shape[0] == num_channels

    # Save hourly_ad_ratio to a .npy file named hup_{patient_hup_id}.npy
    # np.save(
    #     f"{AD_RATIO_DIRECTORY}/hourly/hup_{patient_hup_id}.npy", half_hourly_ad_ratio
    # )
    hourly_ad_ratio_avg = np.nanmean(hourly_ad_ratio, axis=0)
    hourly_ad_ratio_normalized = (
        hourly_ad_ratio_avg - np.nanmedian(hourly_ad_ratio_avg)
    ) / iqr(hourly_ad_ratio_avg, nan_policy="omit")
    assert (
        hourly_ad_ratio_normalized.shape[0] == hourly_ad_ratio_avg.shape[0] == num_hours
    )

    # Assert that hourly_ad_ratio_normalized is not all nan
    assert not np.isnan(hourly_ad_ratio_normalized).all()

    print(
        f"HUP{patient_hup_id}",
        patient_ad_ratio.shape,
        hourly_ad_ratio.shape,
        hourly_ad_ratio_avg.shape,
    )

    np.save(
        f"{AD_RATIO_DIRECTORY}/hourly/HUP_{patient_hup_id}.npy",
        hourly_ad_ratio_normalized,
    )

  hourly_ad_ratio_avg = np.nanmean(hourly_ad_ratio, axis=0)


HUP217 (161, 2392) (161, 399) (399,)
HUP100 (169, 1530) (169, 255) (255,)
HUP89 (98, 878) (98, 147) (147,)
HUP210 (205, 1562) (205, 261) (261,)
HUP107 (120, 1823) (120, 304) (304,)
HUP175 (140, 289) (140, 49) (49,)
HUP80 (104, 2705) (104, 451) (451,)
HUP219 (133, 367) (133, 62) (62,)
HUP87 (90, 1100) (90, 184) (184,)
HUP172 (136, 1439) (136, 240) (240,)
HUP147 (128, 1063) (128, 178) (178,)
HUP196 (102, 1114) (102, 186) (186,)
HUP140 (97, 892) (97, 149) (149,)
HUP191 (150, 1298) (150, 217) (217,)
HUP64 (94, 1840) (94, 307) (307,)
HUP132 (176, 1828) (176, 305) (305,)
HUP225 (181, 1014) (181, 169) (169,)
HUP135 (105, 1192) (105, 199) (199,)
HUP149 (289, 3145) (289, 525) (525,)
HUP198 (152, 1815) (152, 303) (303,)
HUP124 (128, 3746) (128, 625) (625,)
HUP189 (122, 460) (122, 77) (77,)
HUP158 (232, 1023) (232, 171) (171,)
HUP123 (169, 2266) (169, 378) (378,)
HUP180 (121, 1004) (121, 168) (168,)
HUP151 (180, 1124) (180, 188) (188,)
HUP75 (118, 1525) (118, 255) (255,)
HUP72 (60, 1887) (60, 315

In [5]:
hourly_ad_ratio_normalized

array([        nan,         nan,  0.39797883,  0.24516319,  0.28950635,
        0.44779877, -0.34917766,  0.49261442,  0.67100414,  1.10555401,
       -0.78366042, -0.98644492, -1.52035286, -1.18013664,  0.53225261,
       -0.3855093 , -1.43514476,         nan,         nan, -0.92929984,
       -0.39918605,  0.2347145 , -0.01641958, -0.46002324, -1.01686044,
       -0.64443935, -0.23425431, -0.39063314, -0.34278521, -0.18455338,
        0.01211461, -0.08928272,  0.        ,  0.30194891, -0.16918098,
       -0.82808028, -0.65203126, -0.82622381, -0.45577367, -0.61761152,
       -0.35026852, -1.16560261, -0.74242021, -0.04342088,  0.05479575,
       -0.42767997, -0.61598861, -0.11011624, -0.14834312, -0.36211076,
        0.38781984, -0.37648937,  0.28088516,  0.18784072, -0.00447682,
        0.86648315,  1.31917976,  0.91976237, -0.0783496 , -0.58565973,
       -1.00919859, -0.7233449 , -0.54518131, -0.91659698, -0.2608613 ,
       -0.97261122, -0.70323872,  0.51945319,  1.12131928,  0.00

In [6]:
hourly_ad_ratio_normalized > -0.4054

array([False, False,  True,  True,  True,  True,  True,  True,  True,
        True, False, False, False, False,  True,  True, False, False,
       False, False,  True,  True,  True, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
       False, False, False, False,  True, False, False,  True,  True,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False, False, False,
       False,  True, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True])

## Half-hourly alpha-delta ratio

In [8]:
def average_samples_3(patient_ad_ratio):
    # Get the shape of the original array
    original_shape = patient_ad_ratio.shape

    # Calculate new shape
    num_channels = original_shape[0]
    num_samples = original_shape[1]
    num_whole_groups = num_samples // 3
    num_remaining_samples = num_samples % 3

    # Reshape the array to average every 3 samples
    whole_groups_array = patient_ad_ratio[:, : num_whole_groups * 3].reshape(
        num_channels, num_whole_groups, 3
    )
    averaged_array = whole_groups_array.mean(axis=2)

    if num_remaining_samples > 0:
        remaining_array = patient_ad_ratio[:, num_whole_groups * 3 :].mean(
            axis=1, keepdims=True
        )
        averaged_array = np.concatenate((averaged_array, remaining_array), axis=1)

    return averaged_array


# # Test function with some random data
# # Create a 4x31 numpy array, i.e. 4 channels with 31 samples each
# patient_ad_ratio = np.random.rand(4, 31)
# averaged_array = average_samples_3(patient_ad_ratio)
# print(averaged_array)

In [9]:
for filename in os.listdir(ERIN_DIRECTORY):
    patient_hup_id = int(filename[3:6])
    mat_file = scipy.io.loadmat(
        f"../../../erinconr/projects/fc_toolbox/results/analysis/intermediate/{filename}"
    )
    mat_file = mat_file["summ"][0][0]
    patient_ad_ratio = mat_file[ad_ratio_index]
    num_channels = mat_file[6].shape[0]
    assert patient_ad_ratio.shape[0] == num_channels
    hourly_ad_ratio = average_samples_3(patient_ad_ratio)
    num_hours = hourly_ad_ratio.shape[1]
    assert hourly_ad_ratio.shape[0] == num_channels

    # Save hourly_ad_ratio to a .npy file named hup_{patient_hup_id}.npy
    # np.save(
    #     f"{AD_RATIO_DIRECTORY}/hourly/hup_{patient_hup_id}.npy", half_hourly_ad_ratio
    # )
    hourly_ad_ratio_avg = np.nanmean(hourly_ad_ratio, axis=0)
    hourly_ad_ratio_normalized = (
        hourly_ad_ratio_avg - np.nanmedian(hourly_ad_ratio_avg)
    ) / iqr(hourly_ad_ratio_avg, nan_policy="omit")
    assert (
        hourly_ad_ratio_normalized.shape[0] == hourly_ad_ratio_avg.shape[0] == num_hours
    )

    # Assert that hourly_ad_ratio_normalized is not all nan
    assert not np.isnan(hourly_ad_ratio_normalized).all()

    print(
        f"HUP{patient_hup_id}",
        patient_ad_ratio.shape,
        hourly_ad_ratio.shape,
        hourly_ad_ratio_avg.shape,
    )

    np.save(
        f"{AD_RATIO_DIRECTORY}/half_hourly/HUP_{patient_hup_id}.npy",
        hourly_ad_ratio_normalized,
    )

  hourly_ad_ratio_avg = np.nanmean(hourly_ad_ratio, axis=0)


HUP217 (161, 2392) (161, 798) (798,)
HUP100 (169, 1530) (169, 510) (510,)
HUP89 (98, 878) (98, 293) (293,)
HUP210 (205, 1562) (205, 521) (521,)
HUP107 (120, 1823) (120, 608) (608,)
HUP175 (140, 289) (140, 97) (97,)
HUP80 (104, 2705) (104, 902) (902,)
HUP219 (133, 367) (133, 123) (123,)
HUP87 (90, 1100) (90, 367) (367,)
HUP172 (136, 1439) (136, 480) (480,)
HUP147 (128, 1063) (128, 355) (355,)
HUP196 (102, 1114) (102, 372) (372,)
HUP140 (97, 892) (97, 298) (298,)
HUP191 (150, 1298) (150, 433) (433,)
HUP64 (94, 1840) (94, 614) (614,)
HUP132 (176, 1828) (176, 610) (610,)
HUP225 (181, 1014) (181, 338) (338,)
HUP135 (105, 1192) (105, 398) (398,)
HUP149 (289, 3145) (289, 1049) (1049,)
HUP198 (152, 1815) (152, 605) (605,)
HUP124 (128, 3746) (128, 1249) (1249,)
HUP189 (122, 460) (122, 154) (154,)
HUP158 (232, 1023) (232, 341) (341,)
HUP123 (169, 2266) (169, 756) (756,)
HUP180 (121, 1004) (121, 335) (335,)
HUP151 (180, 1124) (180, 375) (375,)
HUP75 (118, 1525) (118, 509) (509,)
HUP72 (60, 1887) 