# Data pre-processing and exploration

In [None]:
import mne
import scipy

import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

from pyedflib import highlevel
from libpysal.weights import KNN
from pathlib import Path

import utils as my_ut

## Wind Spatio-Temporal Dataset

The dataset we use in the following part is the [Wind Spatio-Temporal Dataset2](https://zenodo.org/records/5516550). We extract the values corresponding to the wind speed and the power for the 200 turbines. Such values are the considered multivariate time series. We also use the coordinates of the turbines to turn our time signals into a grap time series.

In [None]:
data_df = pd.read_csv("data/Wind_Turbines/Wind Spatio-Temporal Dataset2.csv")

In [None]:
data_df.head(5)

### Extract the turbine coordinates

In [None]:
coord = data_df.iloc[0:2].copy(deep=True)
col_names = ["Unnamed: 0"] + [f'Turbine{i}' for i in range(1, 201)]
coord = coord[col_names]
coord.set_index('Unnamed: 0', inplace=True)
coord.index.name = None

In [None]:
coord.to_csv('data/turbines_coord.csv')

### Extract the speed and power time series per turbine

In [None]:
speed_power_df = data_df.iloc[3:len(data_df.index)].copy(deep=True)
speed_power_df.columns = speed_power_df.loc[3]
speed_power_df.drop(speed_power_df.index[0], inplace=True)
speed_power_df = speed_power_df.drop(['Mast1_Speed', 'Mast2_Speed', "Mast3_Speed", "Mast1_Direction", 'Mast2_Direction', "Mast3_Direction"], axis=1)

In [None]:
speed_power_df.to_csv('data/turbines_speed_power.csv')

## CHB-MIT_Scalp_EEG

The following data comes from [CHB-MIT Scalp EEG Database](https://physionet.org/content/chbmit/1.0.0/chb01/#files-panel). It consists of a collection of EEG recordings of 22 pediatric subjects with intractable seizures. For each subject, a number of *.edf* files were recorded containing generally one hour of digitized EEG signals, recorded with International 10-20 system of EEG electrode positions and nomenclature.

Please refer to this [learning eeg presentation](https://www.learningeeg.com/montages-and-technical-components) for details about the nomenclature of the channels.

### Opening and vizualization of the data

In [None]:
signals_03, signal_headers_03, header_03 = highlevel.read_edf('data/CHB-MIT_Scalp_EEG/chb01_03.edf')
signals_03.shape

In [None]:
sample_freq = int(signal_headers_03[0]['sample_rate'])

In [None]:
seizure_start_03 = 2996
seizure_end_03 = 3036

fig, ax = plt.subplots(1, 1, figsize=(20, 4))
ax.plot(signals_03[0, :])
ax.axvline(x=sample_freq*seizure_start_03, color='k')
ax.axvline(x=sample_freq*seizure_end_03, color='k')
new_labels = [str(int(label._text)//sample_freq) for label in ax.get_xticklabels()[1:-1]]
ax.set_xticks(ax.get_xticks()[1:-1], new_labels)
ax.set_xlabel('Time (s)')

In [None]:
plot_start_03 = (seizure_start_03 - 10) * sample_freq
plot_end_03 = (seizure_end_03 + 10) * sample_freq

fig, ax = plt.subplots(1, 1, figsize=(20, 4))
ax.plot(signals_03[0, plot_start_03:plot_end_03])
ax.axvline(x=sample_freq*seizure_start_03 - plot_start_03, color='k')
ax.axvline(x=sample_freq*seizure_end_03 - plot_start_03, color='k')
new_labels = [str(int(label._text)//sample_freq + plot_start_03//sample_freq) for label in ax.get_xticklabels()[1:-1]]
ax.set_xticks(ax.get_xticks()[1:-1], new_labels)
ax.set_xlabel('Time (s)')
print(sample_freq*seizure_start_03 - plot_start_03)

In [None]:
plot_start_03 = (seizure_start_03 - 20) * sample_freq
plot_end_03 = (seizure_end_03 + 20) * sample_freq

nb_channel_to_show = 20
fig, axes = plt.subplots(nb_channel_to_show, 1, figsize=(20, nb_channel_to_show//2+5))
for i in range(nb_channel_to_show):
    axes[i].plot(signals_03[i, plot_start_03:plot_end_03])
    axes[i].axvline(x=sample_freq*seizure_start_03 - plot_start_03, color='k')
    axes[i].axvline(x=sample_freq*seizure_end_03 - plot_start_03, color='k')
    axes[i].axis('off')
new_labels = [str(int(label._text)//sample_freq + plot_start_03//sample_freq) for label in ax.get_xticklabels()[1:-1]]
axes[-1].axis('on')
axes[-1].spines['right'].set_visible(False)
axes[-1].spines['top'].set_visible(False)
axes[-1].spines['left'].set_visible(False)
axes[-1].get_yaxis().set_visible(False)
axes[-1].set_xticks(ax.get_xticks()[1:-1], new_labels)
axes[-1].set_xlabel('Time (s)')
fig.suptitle(f'Channels from chb01_03')

In [None]:
signals_04, signal_headers_04, header_04 = highlevel.read_edf('data/CHB-MIT_Scalp_EEG/chb01_04.edf')

In [None]:
seizure_start_04 = 1467
seizure_end_04 = 1494

fig, ax = plt.subplots(1, 1, figsize=(20, 4))
ax.plot(signals_04[0, :])
ax.axvline(x=sample_freq*seizure_start_04, color='k')
ax.axvline(x=sample_freq*seizure_end_04, color='k')
new_labels = [str(int(label._text)//sample_freq) for label in ax.get_xticklabels()[1:-1]]
ax.set_xticks(ax.get_xticks()[1:-1], new_labels)
ax.set_xlabel('Time (s)')

In [None]:
plot_start_04 = (seizure_start_04 - 10) * sample_freq
plot_end_04 = (seizure_end_04 + 10) * sample_freq

fig, ax = plt.subplots(1, 1, figsize=(20, 4))
ax.plot(signals_04[0, plot_start_04:plot_end_04])
ax.axvline(x=sample_freq*seizure_start_04 - plot_start_04, color='k')
ax.axvline(x=sample_freq*seizure_end_04- plot_start_04, color='k')
new_labels = [str(int(label._text)//sample_freq + plot_start_04//sample_freq) for label in ax.get_xticklabels()[1:-1]]
ax.set_xticks(ax.get_xticks()[1:-1], new_labels)
ax.set_xlabel('Time (s)')
print(sample_freq*seizure_start_04 - plot_start_04)

In [None]:
plot_start_04 = (seizure_start_04 - 20) * sample_freq
plot_end_04 = (seizure_end_04 + 20) * sample_freq

nb_channel_to_show = 20
fig, axes = plt.subplots(nb_channel_to_show, 1, figsize=(20, nb_channel_to_show//2+5))
for i in range(nb_channel_to_show):
    axes[i].plot(signals_04[i, plot_start_04:plot_end_04])
    axes[i].axvline(x=sample_freq*seizure_start_04 - plot_start_04, color='k')
    axes[i].axvline(x=sample_freq*seizure_end_04 - plot_start_04, color='k')
    axes[i].axis('off')
new_labels = [str(int(label._text)//sample_freq + plot_start_04//sample_freq) for label in ax.get_xticklabels()[1:-1]]
axes[-1].axis('on')
axes[-1].spines['right'].set_visible(False)
axes[-1].spines['top'].set_visible(False)
axes[-1].spines['left'].set_visible(False)
axes[-1].get_yaxis().set_visible(False)
axes[-1].set_xticks(ax.get_xticks()[1:-1], new_labels)
axes[-1].set_xlabel('Time (s)')
fig.suptitle(f'Channels from chb01_04')

## EEG Motor Movement/Imagery Dataset

This data set consists of over 1500 one- and two-minute EEG recordings from [PhysioNet EEG Motor Movement](https://physionet.org/content/eegmmidb/1.0.0/), obtained from 109 volunteers. Subjects performed different motor/imagery tasks while 64-channel EEG were recorded using the BCI2000 system (http://www.bci2000.org). Each subject performed 14 experimental runs: two one-minute baseline runs (one with eyes open, one with eyes closed), and three two-minute runs of each of four tasks. 

All files contain 64 EEG signals, each sampled at 160 samples per second, and an annotation channel.For each experimental run, temporal segments corresponding to different activity are annotated with **T0**, **T1** or **T2**. Te exact activity depends on the experimental run.

### Pre-processing 

**Objectives:**

- check the quality of the channels: create a function to visualize all channels
- produce some utils: turn the change-points in sec into array indices
- apply the filtering pre-processing and visualize again: use scikit learn or mne
- create the graph: see on mne if something exists, otherwise name the nodes and create edges based on node names

#### Utils

In [None]:
def turn_time_cp_into_indices_cp(annotations, sampling_freq, last_ind):
    indices_cp = []
    for cp_data in annotations[:-1]:
        time_cp = (cp_data[0] + cp_data[1]) * sampling_freq
        indices_cp.append(int(time_cp))
    indices_cp.append(last_ind)
    return indices_cp

In [None]:
def plot_selected_channels(selected_channels, signal, signal_headers, plot_start, plot_end, sampling_freq):
    n_channel_to_show = len(selected_channels)
    fig, axes = plt.subplots(n_channel_to_show, 1, figsize=(20, n_channel_to_show//2+5))
    # iterating over all the channels except the last one
    for ax_i, i in enumerate(selected_channels[:-1]):
        axes[ax_i].plot(signal[i, plot_start:plot_end])
        axes[ax_i].set_ylabel(signal_headers[i]['label'])
        axes[ax_i].set_xticks([], [])
        axes[ax_i].set_yticks([], [])
        for key, spine in axes[ax_i].spines.items():
            spine.set_visible(False)
    # specific processing of the last channel
    axes[-1].plot(signal[selected_channels[-1], plot_start:plot_end])
    axes[-1].set_yticks([], [])
    for _, spine in axes[-1].spines.items():
        spine.set_visible(False)
    new_labels = [str(int(label._text)//sampling_freq + plot_start//sampling_freq) for label in axes[-1].get_xticklabels()[1:-1]]
    axes[-1].set_xticks(axes[-1].get_xticks()[1:-1], new_labels)
    axes[-1].set_xlabel('Time (s)')
    return fig, axes

In [None]:
def add_bkps_to_plot(axes, bkps_ids, color):
    for ax in axes:
        for bkp_id in bkps_ids:
            ax.axvline(x=bkp_id, color=color)
    return axes

In [None]:
def filter_signal(signal, filter_order, low_cutoff, high_cutoff):
    sos_filter = scipy.signal.butter(N=filter_order, Wn=[low_cutoff, high_cutoff], btype='bandpass', fs=sample_freq, output='sos')
    filtered_signal = np.empty(signal.shape)
    for i in range(signal.shape[0]):
        filtered_signal[i, :] = scipy.signal.sosfilt(sos_filter, signal[i, :])
    return filtered_signal

In [None]:
def subsample_signal(signal, sub_freq, sample_freq):
    new_sample_freq = sample_freq / sub_freq
    subsampled_signal = signal[:, ::sub_freq]
    return new_sample_freq, subsampled_signal

#### Data loading

In [None]:
signal_metadata = {}

data_path = 'data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/files'
VOLOUNTEER_ID = 'S007'
EXP_ID = '04'
signal_path = f'{data_path}/{VOLOUNTEER_ID}/{VOLOUNTEER_ID}R{EXP_ID}.edf'

signal, signal_headers, header = highlevel.read_edf(signal_path)
signal.shape

In [None]:
signal_metadata['volunteer_id'] = VOLOUNTEER_ID
signal_metadata['exp_id'] = EXP_ID

In [None]:
print("Overall header: ", header)
print("Number of channels: ", len(signal_headers))
print("signal.shape: ", signal.shape)
print("Names of the channels: ", [metadata['label'] for metadata in signal_headers])
sample_freq = signal_headers[0]['sample_rate']
print("The length of the signals in seconds is: ", signal.shape[1]//sample_freq)

In [None]:
print(signal_headers[0])

In [None]:
raw_data = mne.io.read_raw_edf(signal_path)

In [None]:
print(raw_data.ch_names)
signal_metadata["channels"] = raw_data.ch_names

#### Visual inspection

In [None]:
annotations = header["annotations"]
print(annotations)
gt_bkps = turn_time_cp_into_indices_cp(annotations, sampling_freq=sample_freq, last_ind=signal.shape[1])
print(gt_bkps)

In [None]:
plot_start = 0
plot_end = 20000

good_quality_channels = ["Fc5", "Fc1", "Fc2", "Fc6", "C3", "Cz", "C4", "Cp5", "Cp1", "Cp2", "Cp6", "Fpz", "Af7", "Afz", "Af8", "F5", "F1", "F2", "F6", "Ft7", "T7", "T9", "Tp7", "P7", "P3", "Pz", "P4", "P8", "Po3", "Po4", "O1", "O2"]
good_quality_channels = [ch_name + '..' for ch_name in good_quality_channels]
good_quality_channels = [ch_name[:4] for ch_name in good_quality_channels]

# channels_to_show = mne.pick_channels_regexp(raw_data.ch_names, regexp="*")
channels_to_show = mne.pick_channels(raw_data.ch_names, include=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, signal, signal_headers, plot_start, plot_end, sample_freq)
axes = add_bkps_to_plot(axes, gt_bkps, color='k')
plt.show()

In [None]:
plot_start = 0
plot_end = 20000

channels_to_show = mne.pick_channels(raw_data.ch_names, include=[], exclude=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, signal, signal_headers, plot_start, plot_end, sample_freq)
plt.show()

#### Signal filtering

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 5))
for i in range(signal.shape[0]):
    f, Pxx_den = scipy.signal.welch(signal[i, :], sample_freq, nperseg=1024)
    ax.semilogy(f, Pxx_den)
ax.set_xlabel('frequency [Hz]')
ax.set_ylabel('PSD [V**2/Hz]')
ax.grid(visible=True, axis='y')

In [None]:
LOW_PASS_FREQ = 0.5
HIGH_PASS_FREQ = 30
FILTER_ORDER = 5

filtering_metada = {}
filtering_metada["commit id"] = my_ut.get_git_head_short_hash()
filtering_metada["filtering function"] = filter_signal.__name__
filtering_metada["filter type"] = "bandpass butterworth"
filtering_metada["filter order"] = FILTER_ORDER
filtering_metada["low cutoff"] = LOW_PASS_FREQ
filtering_metada["high cutoff"] = HIGH_PASS_FREQ
signal_metadata["filtering"] = filtering_metada

filtered_signal = filter_signal(signal, filter_order=FILTER_ORDER, low_cutoff=LOW_PASS_FREQ, high_cutoff=HIGH_PASS_FREQ)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 5))
for i in range(filtered_signal.shape[0]):
    f, Pxx_den = scipy.signal.welch(filtered_signal[i, :], sample_freq, nperseg=1024)
    ax.semilogy(f, Pxx_den)
ax.set_xlabel('frequency [Hz]')
ax.set_ylabel('PSD [V**2/Hz]')
ax.set_ylim(bottom=1e-3)
ax.grid(visible=True, axis='y')

In [None]:
plot_start = 0
plot_end = 20000

good_quality_channels = ["Fc5", "Fc1", "Fc2", "Fc6", "C3", "Cz", "C4", "Cp5", "Cp1", "Cp2", "Cp6", "Fpz", "Af7", "Afz", "Af8", "F5", "F1", "F2", "F6", "Ft7", "T7", "T9", "Tp7", "P7", "P3", "Pz", "P4", "P8", "Po3", "Po4", "O1", "O2"]
good_quality_channels = [ch_name + '..' for ch_name in good_quality_channels]
good_quality_channels = [ch_name[:4] for ch_name in good_quality_channels]

channels_to_show = mne.pick_channels(raw_data.ch_names, include=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, filtered_signal, signal_headers, plot_start, plot_end, sample_freq)
axes = add_bkps_to_plot(axes, gt_bkps, color='k')
plt.show()

In [None]:
channels_to_show = mne.pick_channels(raw_data.ch_names, include=[], exclude=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, filtered_signal, signal_headers, plot_start, plot_end, sample_freq)
axes = add_bkps_to_plot(axes, gt_bkps, color='k')
plt.show()

#### Signal subsampling

In [None]:
SUBSAMPLING_FREQUENCY = 2

new_subsample_freq, sub_sampled_signal = subsample_signal(filtered_signal, sub_freq=SUBSAMPLING_FREQUENCY, sample_freq=sample_freq)
signal_metadata["subsampling frequency"] = SUBSAMPLING_FREQUENCY

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 5))
for i in range(sub_sampled_signal.shape[0]):
    f, Pxx_den = scipy.signal.welch(sub_sampled_signal[i, :], new_subsample_freq, nperseg=1024)
    ax.semilogy(f, Pxx_den)
ax.set_xlabel('frequency [Hz]')
ax.set_ylabel('PSD [V**2/Hz]')
ax.set_ylim(bottom=1e-3)
ax.grid(visible=True, axis='y')

In [None]:
annotations = header["annotations"]
subsampled_gt_bkps = turn_time_cp_into_indices_cp(annotations, sampling_freq=new_subsample_freq, last_ind=sub_sampled_signal.shape[1])

In [None]:
plot_start = 0
plot_end = 10000

good_quality_channels = ["Fc5", "Fc1", "Fc2", "Fc6", "C3", "Cz", "C4", "Cp5", "Cp1", "Cp2", "Cp6", "Fpz", "Af7", "Afz", "Af8", "F5", "F1", "F2", "F6", "Ft7", "T7", "T9", "Tp7", "P7", "P3", "Pz", "P4", "P8", "Po3", "Po4", "O1", "O2"]
good_quality_channels = [ch_name + '..' for ch_name in good_quality_channels]
good_quality_channels = [ch_name[:4] for ch_name in good_quality_channels]

channels_to_show = mne.pick_channels(raw_data.ch_names, include=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, sub_sampled_signal, signal_headers, plot_start, plot_end, new_subsample_freq)
axes = add_bkps_to_plot(axes, subsampled_gt_bkps, color='k')
plt.show()

#### Whole workflow

In [None]:
# data_path = 'data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/files'
data_path = 'data_1/real_data/physionet.org/files/eegmmidb/1.0.0'
VOLOUNTEER_ID = 'S001'
EXP_ID = '03'
signal_path = f'{data_path}/{VOLOUNTEER_ID}/{VOLOUNTEER_ID}R{EXP_ID}.edf'
signal, signal_headers, header = highlevel.read_edf(signal_path)
sample_freq = signal_headers[0]['sample_rate']
raw_data = mne.io.read_raw_edf(signal_path)


In [None]:
# data_path = 'data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/files'
data_path = 'data_1/real_data/physionet.org/files/eegmmidb/1.0.0'

VOLOUNTEER_ID_LIST = ['S0' + str(i) for i in range(31, 51)]
EXP_ID_LIST = ['03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14']
LOW_PASS_FREQ = 0.5
HIGH_PASS_FREQ = 40
FILTER_ORDER = 3
SUBSAMPLING_FREQUENCY = 8
# SAVING_DIR = "data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/processed_signals"
SAVING_DIR = "data_1/real_data/physionet.org/processed_signals"

subfolder = f"filtered_{LOW_PASS_FREQ}-{HIGH_PASS_FREQ}_order_{FILTER_ORDER}_subsampled_{SUBSAMPLING_FREQUENCY}"
path = f"{SAVING_DIR}/{subfolder}"

# logging
signal_metadata = {}
signal_metadata["channels"] = raw_data.ch_names
signal_metadata["sample freq"] = sample_freq
filtering_metada = {}
filtering_metada["commit id"] = my_ut.get_git_head_short_hash()
filtering_metada["filtering function"] = filter_signal.__name__
filtering_metada["filter type"] = "bandpass butterworth"
filtering_metada["filter order"] = FILTER_ORDER
filtering_metada["low cutoff"] = LOW_PASS_FREQ
filtering_metada["high cutoff"] = HIGH_PASS_FREQ
signal_metadata["filtering"] = filtering_metada
signal_metadata["subsampling frequency"] = SUBSAMPLING_FREQUENCY
signal_metadata = my_ut.turn_all_list_of_dict_into_str(signal_metadata)
# my_ut.create_parent_and_dump_json(path, name=f"signals_metadata.json", data=signal_metadata, indent=4)

# signal processing
for VOLOUNTEER_ID in VOLOUNTEER_ID_LIST: 

    save_dir = f'{path}/{VOLOUNTEER_ID}'
    Path(save_dir).mkdir(parents=True, exist_ok=False)

    for EXP_ID in EXP_ID_LIST:
    
        # data loading
        signal_path = f'{data_path}/{VOLOUNTEER_ID}/{VOLOUNTEER_ID}R{EXP_ID}.edf'
        signal, signal_headers, header = highlevel.read_edf(signal_path)

        # data processing
        filtered_signal = filter_signal(signal, filter_order=FILTER_ORDER, low_cutoff=LOW_PASS_FREQ, high_cutoff=HIGH_PASS_FREQ)
        new_subsample_freq, sub_sampled_signal = subsample_signal(filtered_signal, sub_freq=SUBSAMPLING_FREQUENCY, sample_freq=sample_freq)
        sub_sampled_signal = sub_sampled_signal.T
        annotations = header["annotations"]
        subsampled_gt_bkps = turn_time_cp_into_indices_cp(annotations, sampling_freq=new_subsample_freq, last_ind=sub_sampled_signal.shape[1])

        # data saving
        NAME = f"volunteer{VOLOUNTEER_ID}_exp{EXP_ID}"
        my_ut.save_signal_and_bkps(sub_sampled_signal, subsampled_gt_bkps, save_dir, NAME)

### Building a graph for the recording system

The EEGs were recorded from 64 electrodes as per the international 10-10 system (excluding electrodes Nz, F9, F10, FT9, FT10, A1, A2, TP9, TP10, P9, and P10).

**NOTE**: there exists a library building networkx graphs from EEG signals based on a given connectivity measure: [EEGGRaph](https://github.com/ufvceiec/EEGRAPH).


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 4))
easycap_montage = mne.channels.make_standard_montage("easycap-M1")
montage_ch_names = easycap_montage.ch_names
easycap_montage.plot(axes=ax)
plt.show()

In [None]:
print(easycap_montage.__dict__)
print(type(easycap_montage.dig))
print(easycap_montage.dig[3])
print(easycap_montage.dig[3].__repr__())
print(easycap_montage.dig[3].__repr__()[25:45])

In [None]:
uppercase_raw_data_ch_names = [ch_name.upper() for ch_name in raw_data.ch_names]
temp_montage_ch_names = [ch_name + '..' for ch_name in montage_ch_names]
upper_adataped_montage_ch_names = [ch_name[:4].upper() for ch_name in temp_montage_ch_names]

In [None]:
list_of_channel_pos = easycap_montage.dig
coord_per_channel_label = {}

for ch_name, digpoint in zip(montage_ch_names, list_of_channel_pos[3:]):
    # adapt the format of the ch names to the one provided by raw_data
    ch_name = ch_name + '..'
    ch_name = ch_name[:4]
    if ch_name.upper() in uppercase_raw_data_ch_names:
        # retrieve the corresponding coordinates
        coords_str = digpoint.__repr__().split('(')[1].split(')')[0]
        coords_str_list = coords_str.split(',')
        coord_x, coord_y, coord_z = float(coords_str_list[0]), float(coords_str_list[1]), float(coords_str_list[2])
        coord_per_channel_label[ch_name.upper()] = (coord_x, coord_y, coord_z)
coord_per_channel_label['T9..'] = (-105, 0.0, -3.3)
coord_per_channel_label['T10.'] = (105, 0.0, -3.3)

In [None]:
print(uppercase_raw_data_ch_names)
print(upper_adataped_montage_ch_names)
print(coord_per_channel_label)

In [None]:
ch_name_per_signal_index_dict = {i: ch_name for i, ch_name in enumerate(uppercase_raw_data_ch_names)}
signal_like_order_coords_per_ch_label = {ch_name_per_signal_index_dict[i]: coord_per_channel_label[ch_name_per_signal_index_dict[i]] for i in range(len(ch_name_per_signal_index_dict))}
print(ch_name_per_signal_index_dict)
print(signal_like_order_coords_per_ch_label)

In [None]:
signl_like_ordered_coords_arr = np.array([coords for coords in signal_like_order_coords_per_ch_label.values()])
knn_graphs = knn_weights = KNN.from_array(signl_like_ordered_coords_arr, 4)
G_directed = knn_weights.to_networkx()
G = G_directed.to_undirected()
node_labels = ch_name_per_signal_index_dict
print(f"The graph has {G.number_of_nodes()} nodes.")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
nx.draw_networkx(G, pos = signl_like_ordered_coords_arr[:, :2], ax=ax, labels=node_labels)
plt.show()

In [None]:
import utils as my_ut

graph_name = 'KNN_4_64_ch_graph_mat_adj_order_signal_header.npy'
my_ut.save_graph(G, f'data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/{graph_name}')

In [None]:
saved_adj_mat = np.load(f'data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/{graph_name}', allow_pickle=False)
G_saved = nx.from_numpy_array(saved_adj_mat)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
nx.draw_networkx(G_saved, pos = signl_like_ordered_coords_arr[:, :2], ax=ax, labels=node_labels)
plt.show()

### Show prediction

In [None]:
data_path = "data/real_datasets/eeg-motor-movementimagery-dataset-1.0.0/processed_signals/filtered_0.5-40_subsampled_2"
results_path = "results_1/real_data/eeg-motor-movement/filtered_0.5-40_subsampled_2/S007_exp_04-05-06"

In [None]:
signal_path = f'{data_path}/volunteer{VOLOUNTEER_ID}_exp{EXP_ID}'
to_plot_signal = np.load(f"{signal_path}_signal.npy", allow_pickle=False)
to_plot_signal_metadata = my_ut.open_json(f"{signal_path}_metadata.json")
final_sample_freq = sample_freq / to_plot_signal_metadata["subsampling frequency"]

In [None]:
statio_pred = my_ut.open_json(f"{results_path}/statio_pred.json")[str(EXP_ID)]
pred_bkps = my_ut.turn_str_of_list_into_list_of_int(statio_pred["pred"])
gt_bkps = my_ut.turn_str_of_list_into_list_of_int(statio_pred["gt"])

In [None]:
plot_start = 0
plot_end = 10000

good_quality_channels = ["Fc5", "Fc1", "Fc2", "Fc6", "C3", "Cz", "C4", "Cp5", "Cp1", "Cp2", "Cp6", "Fpz", "Af7", "Afz", "Af8", "F5", "F1", "F2", "F6", "Ft7", "T7", "T9", "Tp7", "P7", "P3", "Pz", "P4", "P8", "Po3", "Po4", "O1", "O2"]
good_quality_channels = [ch_name + '..' for ch_name in good_quality_channels]
good_quality_channels = [ch_name[:4] for ch_name in good_quality_channels]

# channels_to_show = mne.pick_channels_regexp(raw_data.ch_names, regexp="*")
channels_to_show = mne.pick_channels(raw_data.ch_names, include=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, to_plot_signal, signal_headers, plot_start, plot_end, final_sample_freq)
axes = add_bkps_to_plot(axes, gt_bkps, color='g')
axes = add_bkps_to_plot(axes, pred_bkps, color='darkorange')
plt.show()

In [None]:
plot_start = 0
plot_end = 10000

channels_to_show = mne.pick_channels(raw_data.ch_names, include=[], exclude=good_quality_channels)
print("Selected channel indices: ", channels_to_show)

fig, axes = plot_selected_channels(channels_to_show, to_plot_signal, signal_headers, plot_start, plot_end, final_sample_freq)
axes = add_bkps_to_plot(axes, gt_bkps, color='g')
axes = add_bkps_to_plot(axes, pred_bkps, color='darkorange')
plt.show()