In [1]:
import numpy as np
import pandas as pd

import os

import mne
from tqdm.notebook import tqdm

import pickle


In [2]:
global_path = 'physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/'

files = os.listdir(global_path)
annot_files = {int(x.split('/')[-1][3:6]): x for x in files if 'Hypnogram' in x}
data_files = {int(x.split('/')[-1][3:6]): x for x in files if 'PSG' in x}
subj_ids = data_files.keys()

In [3]:
event_id_global_map = {
    'Sleep stage 1': 1,
    'Sleep stage 2': 2,
    'Sleep stage 3': 3,
    'Sleep stage 4': 3, # remap
    'Sleep stage ?': 5,
    'Sleep stage R': 6,
    'Sleep stage W': 7,
    'Movement time': 8
}

stages_to_drop = [5, 8] # drop "Sleep stage ?" and "Movement time"

In [8]:
raw.get_data().shape

(7, 7750000)

In [4]:
data = []
label = []
subject = []

target_fs = 100
channels_to_use = ['EEG Fpz-Cz']

for subj in tqdm(sorted(list(subj_ids))[:100]):
    file_path = global_path + data_files[subj]
    file_path_annot = global_path + annot_files[subj]

    raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)
    
    start_time = raw.times[0]
    end_time = raw.times[-1]
    trim_start = 1000  # trim 10 seconds from the beginning
    trim_end = 1000     # trim 5 seconds from the end
    raw = raw.crop(tmin=start_time + trim_start, tmax=end_time - trim_end)
    
    file_annot = mne.read_annotations(file_path_annot)
    offset = 3
    delete_annotations = [i for i, x in enumerate(file_annot) if x['description']=='Sleep stage W' and (i < offset or i >= len(file_annot) - offset)]
    file_annot.delete(delete_annotations)
    delete_annotations = [i for i, x in enumerate(file_annot) if x['description'] in {'Sleep stage ?', 'Movement time'}]
    file_annot.delete(delete_annotations)
    raw.set_annotations(file_annot)

    data_len = len(raw.times)
    nepochs = int(np.floor(data_len / (target_fs * 30)) - 1)

    # Create events from annotations
    events, event_id = mne.events_from_annotations(raw)
    event_id_remap = {v: event_id_global_map[k] for k, v in event_id.items()}

    # epoch_file =  mne.Epochs(file, events, tmin=0, tmax=5,baseline=None)
    epoch_file = mne.Epochs(raw, events, tmin=0., tmax=30.0, baseline=None, preload=True) 

    dataframe = epoch_file.to_data_frame()

    dataframe.condition = dataframe.condition.apply(lambda x: event_id_remap[int(x)])

    epochs = dataframe.epoch
    condition = dataframe.condition
    df = dataframe[channels_to_use]

    # Z-Score Each Channel
    mean_vals = df.mean(axis=0)
    sd_vals = df.std(axis=0)

    for ch in channels_to_use:
        df[ch] -= mean_vals[ch]
        df[ch] /= sd_vals[ch]

    

    count = 0
    for epoch in sorted(np.unique(epochs)):
        cur_label = int(condition[epochs == epoch].iloc[0])
        single_channel = np.array(df[epochs == epoch]).transpose()[None]
        multi_channel = np.repeat(single_channel, repeats=19, axis=1) 
        multi_channel += np.random.normal(scale=0.7, size=multi_channel.shape)
        vals = multi_channel[...,:30*target_fs]
        data.append(vals)
        label.append(cur_label)
        count += 1

    subj_real = int(data_files[subj][3:5])
    subject.extend(list((subj_real*np.ones((count,))).astype(int)))

data_final = np.concatenate(data, axis=0)
label_final = np.array(label)
subject_final = np.array(subject)

filename1 = 'Spectral_Explainability/segmented_sc_data.pkl'
save_data1 = {'data':data_final,'subject':subject_final,'label':label_final}

with open(filename1, 'wb') as f:
    pickle.dump(save_data1, f)

  0%|          | 0/100 [00:00<?, ?it/s]

Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)
  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)
  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)


Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...
Sampling frequency of the instance is already 100.0, returning unmodified.
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
151 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 151 events and 3001 original time points ...
0 bad epochs dropped


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[ch] -= mean_vals[ch]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[ch] /= sd_vals[ch]


Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8489999  =      0.000 ... 84899.990 secs...


  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)
  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)
  raw = mne.io.read_raw_edf(file_path, preload=True).resample(sfreq=target_fs)


KeyboardInterrupt: 

Train

In [4]:
# Generic ML Libraries
import sklearn
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import confusion_matrix
from sklearn.utils import class_weight

# General Libraries

import numpy as np
from scipy.io import loadmat, savemat
from scipy.fft import fft, fftfreq, ifft
import h5py
import os
import pickle

# Figure Libraries
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.colors import LinearSegmentedColormap

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from tqdm.notebook import tqdm
import torch.optim as optim
from collections import defaultdict


## IMPORT DATA

datadir = "Spectral_Explainability"
filename = "segmented_sc_data.pkl"

with open(os.path.join(datadir,filename), 'rb') as f:
    mat_file = pickle.load(f)
X = np.float32(mat_file['data']); # data
Y = np.float32(mat_file['label']); # labels
S = np.float32(mat_file['subject']); # subject number

print(np.shape(X))
print(np.shape(Y))
print(np.shape(S))

(13137, 19, 3000)
(13137,)
(13137,)


In [16]:
map_targets = {x: i for i, x in enumerate(np.unique(Y))}
map_targets

{1.0: 0, 2.0: 1, 3.0: 2, 6.0: 3, 7.0: 4}

In [17]:
Y = np.array([map_targets[x] for x in Y])

In [5]:


## ChannelDropout Code
class ChannelDropout(nn.Module):
    def __init__(self, rate, noise_shape=None, seed=None):
        super().__init__()
        self.rate = min(1., max(0., rate))
        self.noise_shape = noise_shape
        self.seed = seed
        if seed is not None:
            torch.manual_seed(seed)

    def forward(self, inputs):
        if self.training:
            noise_shape = [1, 1, inputs.shape[2]]
            mask = torch.bernoulli(torch.full(noise_shape, 1 - self.rate)).to(inputs.device)
            mask = mask.expand_as(inputs)
            return inputs * mask / (1 - self.rate)
        else:
            return inputs

    def extra_repr(self):
        return f'rate={self.rate}, noise_shape={self.noise_shape}, seed={self.seed}'
    

class ModelMDD(nn.Module):
    def __init__(self, dropout=0.5):
        super().__init__()
        self.n_features = 19
        self.n_points = 3000

        self.channel_dropout = ChannelDropout(rate=0.25)
        
        # Conv1D layers
        self.conv1 = nn.Conv1d(self.n_features, 5, kernel_size=10, stride=1, padding=0)
        self.conv2 = nn.Conv1d(5, 10, kernel_size=10, stride=1, padding=0)
        self.conv3 = nn.Conv1d(10, 10, kernel_size=10, stride=1, padding=0)
        self.conv4 = nn.Conv1d(10, 15, kernel_size=5, stride=1, padding=0)
        
        # BatchNorm layers
        self.bn1 = nn.BatchNorm1d(5)
        self.bn2 = nn.BatchNorm1d(10)
        self.bn3 = nn.BatchNorm1d(10)
        self.bn4 = nn.BatchNorm1d(15)

        # BatchNorm layers
        self.fc_bn1 = nn.BatchNorm1d(256)
        self.fc_bn2 = nn.BatchNorm1d(64)
        
        # Dense layers
        self.fc1 = nn.Linear(15 * 181, 256)  # 184 is calculated based on the input size and conv/pool operations
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 5)
        
        self.dropout = nn.AlphaDropout(dropout)
        
    def forward(self, x):
        # channel dropout
        x = self.channel_dropout(x)

        # Conv layers
        x = F.elu(self.conv1(x))
        x = F.max_pool1d(x, 2)
        x = self.bn1(x)
        
        x = F.elu(self.conv2(x))
        x = F.max_pool1d(x, 2)
        x = self.bn2(x)
        
        x = F.elu(self.conv3(x))
        x = F.max_pool1d(x, 2)
        x = self.bn3(x)
        
        x = F.elu(self.conv4(x))
        x = F.max_pool1d(x, 2)
        x = self.bn4(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Dense layers
        x = self.dropout(x)
        x = F.elu(self.fc_bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.elu(self.fc_bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x) # F.softmax(, dim=1)
        
        return x
    
def get_model(dropout=0.5):
    model = ModelMDD(dropout)
    return model

model = get_model()
model.train()
print(model)

ModelMDD(
  (channel_dropout): ChannelDropout(rate=0.25, noise_shape=None, seed=None)
  (conv1): Conv1d(19, 5, kernel_size=(10,), stride=(1,))
  (conv2): Conv1d(5, 10, kernel_size=(10,), stride=(1,))
  (conv3): Conv1d(10, 10, kernel_size=(10,), stride=(1,))
  (conv4): Conv1d(10, 15, kernel_size=(5,), stride=(1,))
  (bn1): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm1d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=2715, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_fe

In [23]:
device = 'cuda'

def evaluate_model(X_train, X_val, Y_train, Y_val, checkpoint_path):
    # Convert numpy arrays to PyTorch tensors
    X_train = torch.FloatTensor(X_train)
    X_val = torch.FloatTensor(X_val)
    Y_train = torch.LongTensor(Y_train)
    Y_val = torch.LongTensor(Y_val)

    # Create DataLoaders
    train_dataset = TensorDataset(X_train, Y_train)
    val_dataset = TensorDataset(X_val, Y_val)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128)

    # Get model
    model = get_model()
    model.to(device)

    # Compute class weights
    class_weights = compute_class_weight('balanced', classes=np.unique(Y_train.numpy()), 
                                         y=Y_train.numpy().squeeze())
    class_weights = torch.FloatTensor(class_weights).to(device)

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.AdamW(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True)

    # Training loop
    num_epochs = 100
    best_val_acc = 0
    patience = 20
    counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        train_acc = train_correct / train_total

        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:

                inputs, labels = inputs.to(device), labels.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        val_acc = val_correct / val_total

        # Print epoch results
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Learning rate scheduler step
        scheduler.step(val_acc)

        torch.save(model.state_dict(), checkpoint_path)

        # # Early stopping
        # if val_acc > best_val_acc:
        #     best_val_acc = val_acc
        #     counter = 0
        #     torch.save(model.state_dict(), checkpoint_path)
        # else:
        #     counter += 1
        #     if counter >= patience:
        #         print("Early stopping")
        #         break

    # Load best model
    model.load_state_dict(torch.load(checkpoint_path))

    return model  # Return None instead of history, as PyTorch doesn't have a built-in history object

In [24]:
prefix = 'trim_begin_end_1000_wo_wake_new'

In [25]:
# Run Classifier for 10 Folds
n_folds = 10
Y_pred = []; Y_test_all = []; Y_pred_val = []; Y_val_all = [];
val_loss = []; train_loss = [];
val_acc = []; train_acc = [];
confusion_matrices = [];
Sample_Idx = np.expand_dims(np.arange(np.shape(Y)[0]),axis=1) 

count = 0
# split data into Train/Val and Test Groups
cv = GroupShuffleSplit(n_splits=n_folds,test_size=0.1,train_size=0.9,random_state=0)
for train_val_idx, test_idx in tqdm(cv.split(X,Y,S), desc=f"Split", total=n_folds):
    X_train_val = X[train_val_idx,...]
    Y_train_val = Y[train_val_idx,...]
    S_train_val = S[train_val_idx,...]
    X_test = X[test_idx,...]
    Y_test = Y[test_idx,...]
    S_test = S[test_idx,...]
    Sample_Idx_Test = Sample_Idx[test_idx,...]
    
    # Split Train/Val Data into Training and Validation Groups
    cv2 = GroupShuffleSplit(n_splits=1,test_size=0.10,train_size=0.90,random_state=0)
    for train_idx, val_idx in cv2.split(X_train_val,Y_train_val,S_train_val):
        X_train = X_train_val[train_idx,...]
        Y_train = Y_train_val[train_idx,...]
        S_train = S_train_val[train_idx,...]
        X_val = X_train_val[val_idx,...]
        Y_val = Y_train_val[val_idx,...]
        S_val = S_train_val[val_idx,...]
    X_train_val = []; Y_train_val = []; S_train_val = []
    
    # Define Model Checkpoints
    if prefix:
        directory = f"sleep_pretrain_ckpt/{prefix}"
        if not os.path.exists(directory):
            os.makedirs(directory)
        file_path = f"{directory}/sleep_model_Fold"+str(count)+".pt"
    else:
        file_path = "sleep_pretrain_ckpt/sleep_model_Fold"+str(count)+".pt"

    # Evaluate model
    model = evaluate_model(X_train, X_val, Y_train, Y_val, checkpoint_path=file_path)

    X_test = torch.tensor(X_test).to(device)
    y_pred = torch.argmax(model(X_test), dim=1).cpu().data.numpy()

    confusion_matrices.append(confusion_matrix(Y_test, y_pred))
    
    print(count)
    count += 1

Split:   0%|          | 0/10 [00:00<?, ?it/s]



RuntimeError: The size of tensor a (5) must match the size of tensor b (19) at non-singleton dimension 1

In [4]:
def calculate_metrics(confusion_matrices, class_mapping):
    n_classes = len(class_mapping)
    
    # Initialize arrays to store metrics
    precision = np.zeros((len(confusion_matrices), n_classes))
    recall = np.zeros((len(confusion_matrices), n_classes))
    f1_score = np.zeros((len(confusion_matrices), n_classes))
    
    for i, cm in enumerate(confusion_matrices):
        for class_name, class_index in class_mapping.items():
            tp = cm[class_index, class_index]
            fp = np.sum(cm[:, class_index]) - tp
            fn = np.sum(cm[class_index, :]) - tp
            
            # Calculate precision
            precision[i, class_index] = tp / (tp + fp) if (tp + fp) > 0 else 0
            
            # Calculate recall
            recall[i, class_index] = tp / (tp + fn) if (tp + fn) > 0 else 0
            
            # Calculate F1-score
            f1_score[i, class_index] = 2 * (precision[i, class_index] * recall[i, class_index]) / (precision[i, class_index] + recall[i, class_index]) if (precision[i, class_index] + recall[i, class_index]) > 0 else 0
    
    # Calculate mean and standard deviation for each metric
    mean_precision = np.mean(precision, axis=0) * 100
    std_precision = np.std(precision, axis=0) * 100
    mean_recall = np.mean(recall, axis=0) * 100
    std_recall = np.std(recall, axis=0) * 100
    mean_f1 = np.mean(f1_score, axis=0) * 100
    std_f1 = np.std(f1_score, axis=0) * 100
    
    # Print results
    print("Class\t\tPrecision\t\tRecall\t\t\tF1-Score")
    print("-" * 70)
    for class_name, class_index in class_mapping.items():
        print(f"{class_name}\t\t{mean_precision[class_index]:.2f}±{std_precision[class_index]:.2f}\t\t{mean_recall[class_index]:.2f}±{std_recall[class_index]:.2f}\t\t{mean_f1[class_index]:.2f}±{std_f1[class_index]:.2f}")

    
class_mapping = {
    "Awake": 4,
    "NREM1": 0,
    "NREM2": 1,
    "NREM3": 2,
    "REM": 3
}

class_mapping

{'Awake': 4, 'NREM1': 0, 'NREM2': 1, 'NREM3': 2, 'REM': 3}

In [5]:
calculate_metrics(confusion_matrices, class_mapping)

NameError: name 'confusion_matrices' is not defined

In [5]:
import tensorflow as tf
from keras.layers import Layer
import keras

class ChannelDropout(Layer):
    def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
        super().__init__(**kwargs)
        self.rate = min(1., max(0., rate))
        self.noise_shape = noise_shape
        self.seed = seed
    
    def call(self, inputs, training = None):
        return tf.nn.dropout(inputs, 
                             rate=self.rate, 
                            noise_shape = [1,1,inputs.shape[2]])

    def compute_output_shape(self, input_shape):
        return input_shape
    
    def get_config(self):
        config = {
            "rate": self.rate,
            "noise_shape": self.noise_shape,
            "seed": self.seed,
        }
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))

2024-10-25 06:53:44.433558: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-25 06:53:44.461860: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
custom_objects = {'ChannelDropout': ChannelDropout}
with keras.saving.custom_object_scope(custom_objects):
    keras_model = keras.models.load_model('Pretraining/Sleep_Models/sleep_model_Fold9.hdf5')

2024-10-25 06:53:45.763176: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-25 06:53:45.764087: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [10]:
out = keras_model.predict(X.transpose(0, 2, 1))

 15/411 [>.............................] - ETA: 3s

2024-10-25 06:55:02.308801: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2995236000 exceeds 10% of free system memory.




In [21]:
Y[:100]

array([0, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 1, 2, 2, 2, 2, 1, 2, 1,
       2, 0, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0, 2, 1, 2, 1, 2, 3,
       0, 1, 2, 0, 1, 0, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1, 0, 1, 0, 1, 3, 1, 0, 4, 0, 1, 0,
       1, 0, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])

In [20]:
np.argmax(out, axis=1)[:100]

array([3, 3, 3, 3, 3, 3, 1, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 2,
       2, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
       2, 2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 2, 2, 2, 2, 2, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [6]:
import sys
import mne
import numpy as np
import os
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import pandas as pd
from multiprocessing import Process
import pickle
import argparse

def pretext_train_test(root_folder, k, N, epoch_sec):
    all_index = np.unique([path[:6] for path in os.listdir(root_folder)])
    
    pretext_index = np.random.choice(all_index, int(len(all_index) * 0.9), replace=False)
    train_index = np.random.choice(list(set(all_index) - set(pretext_index)), int(len(all_index) * 0.05), replace=False)
    test_index = list(set(all_index) - set(pretext_index) - set(train_index))

    print ('start pretext process!')
    sample_process(root_folder, k, N, epoch_sec, 'pretext', pretext_index)
    print ()
    
    print ('start train process!')
    sample_process(root_folder, k, N, epoch_sec, 'train', train_index)
    print ()
    
    print ('start test process!')    
    sample_process(root_folder, k, N, epoch_sec, 'test', test_index)
    print ()


def sample_process(root_folder, k, N, epoch_sec, train_test_val, index):
    for i, j in enumerate(index):
        if i % N == k:
            if k == 0:
                print ('Progress: {} / {}'.format(i, len(index)))

            # load signal "X" part
            data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
            X = data.get_data()[:2, :]
            
            # load label "Y" part
            ann = mne.read_annotations(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('Hypnogram' in x), os.listdir(root_folder)))[0])

            offset = 5
            delete_annotations = [i for i, x in enumerate(ann) if x['description']=='Sleep stage W' and (i < offset or i >= len(ann) - offset)]
            ann.delete(delete_annotations)
            delete_annotations = [i for i, x in enumerate(ann) if x['description'] in {'Sleep stage ?', 'Movement time'}]
            ann.delete(delete_annotations)

            labels = []
            for dur, des in zip(ann.duration, ann.description):
                for i in range(int(dur) // 30):
                    labels.append(des[-1])

            # slice the EEG signals into non-overlapping windows, window size = sampling rate per second * second time = 100 * windowsize
            for slice_index in range(X.shape[1] // (100 * epoch_sec)):
                if slice_index >= len(labels):
                    break
                
                # ingore the no labels
                path = './Spectral_Explainability/cassette_processed/{}/'.format(train_test_val) + 'cassette-' + j + '-' + str(slice_index) + '.pkl'
                pickle.dump({'X': X[:, slice_index * 100 * epoch_sec: (slice_index+1) * 100 * epoch_sec], \
                    'y': labels[slice_index]}, open(path, 'wb'))

    
if not os.path.exists('./Spectral_Explainability/cassette_processed'):
    os.makedirs('./Spectral_Explainability/cassette_processed/pretext')
    os.makedirs('./Spectral_Explainability/cassette_processed/train')
    os.makedirs('./Spectral_Explainability/cassette_processed/test')

root_folder = './physionet.org/files/sleep-edfx/1.0.0/sleep-cassette'

N, epoch_sec = 10, 30
p_list = []
for k in range(N):
    process = Process(target=pretext_train_test, args=(root_folder, k, N, epoch_sec))
    process.start()
    p_list.append(process)

for i in p_list:
    i.join()

  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...


Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4411E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4541F0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4341F0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4061E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4221E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4171E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4501E0-PSG.edf...
EDF file detected
Setting channel info structure...


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4641E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4122E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4011E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4022E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4051E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4372F0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4101E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4291G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4532E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4471F0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4701E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4731E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4772G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4241E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4671G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4522E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Progress: 60 / 137
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4811G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4331F0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4302E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4232E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4602E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4112E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4152E0-PSG.edf...
EDF file detected
Setting channel info structure...


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4292G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...

Progress: 110 / 137
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4632E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4121E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Progress: 120 / 137
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4592G0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...
Progress: 130 / 137
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4622E0-PSG.edf...
EDF file detected
Setting channel info structure...


  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])
  data = mne.io.read_raw_edf(root_folder + '/' + list(filter(lambda x: (x[:6] == j) and ('PSG' in x), os.listdir(root_folder)))[0])


Creating raw.info structure...

start test process!
Extracting EDF parameters from /home/ansafronov/Yandex.Disk/Studies/neuroml/project/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4382F0-PSG.edf...
EDF file detected
Setting channel info structure...


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
import random

# Residual Block
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=False, pooling=False):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ELU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.maxpool = nn.MaxPool1d(2, stride=2) 
        self.downsample = nn.Sequential(
           nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
           nn.BatchNorm1d(out_channels)
        )
        self.downsampleOrNot = downsample
        self.pooling = pooling
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        # out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsampleOrNot:
            residual = self.downsample(x)
        out += residual
        if self.pooling:
            out = self.maxpool(out)
        out = self.dropout(out)
        return out

class CNNEncoder1d_SLEEP(nn.Module):
    def __init__(self, n_dim):
        super(CNNEncoder1d_SLEEP, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv1d(4, 6, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(6),
            nn.ELU(inplace=True),
        )
        self.conv2 = ResBlock(6, 8, 2, True, False)
        self.conv3 = ResBlock(8, 16, 2, True, True)
        self.conv4 = ResBlock(16, 32, 2, True, True)
        self.n_dim = n_dim

        self.fc = nn.Sequential(
            nn.Linear(128, self.n_dim, bias=True),
            nn.ReLU(),
            nn.Linear(self.n_dim, self.n_dim, bias=True),
        )

        self.sup = nn.Sequential(
            nn.Linear(128, 32, bias=True),
            nn.ReLU(),
            nn.Linear(32, 5, bias=True),
        )

        self.byol_mapping = nn.Sequential(
            nn.Linear(128, self.n_dim, bias=True),
            nn.ReLU(),
            nn.Linear(self.n_dim, self.n_dim, bias=True),
        )

    def torch_stft(self, X_train):
        signal = []

        for s in range(X_train.shape[1]):
            spectral = torch.stft(X_train[:, s, :],
                n_fft = 256,
                hop_length = 256 * 1 // 4,
                center = False,
                onesided = True,
                return_complex=False)
            signal.append(spectral)
        
        signal1 = torch.stack(signal)[:, :, :, :, 0].permute(1, 0, 2, 3)
        signal2 = torch.stack(signal)[:, :, :, :, 1].permute(1, 0, 2, 3)

        return torch.cat([torch.log(torch.abs(signal1) + 1e-8), torch.log(torch.abs(signal2) + 1e-8)], dim=1)

    def forward(self, x, simsiam=False, mid=True, byol=False, sup=False):
        x = self.torch_stft(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        x = x.reshape(x.shape[0], -1)

        if sup:
            return self.sup(x)
        elif simsiam:
            return x, self.fc(x)
        elif mid:
            return x
        elif byol:
            x = self.fc(x)
            x = self.byol_mapping(x)
            return x
        else:
            x = self.fc(x)
            return x

In [11]:


class SLEEPCALoader(torch.utils.data.Dataset):
    def __init__(self, list_IDs, dir, SS=True):
        self.list_IDs = list_IDs
        self.dir = dir
        self.SS = SS

        self.label_list = ['W', 'R', 1, 2, 3]
        self.bandpass1 = (1, 5)
        self.bandpass2 = (30, 49)
        self.n_length = 100 * 30
        self.n_channels = 19
        self.n_classes = 5
        self.signal_freq = 100
        self.bound = 0.00025

    def __len__(self):
        return len(self.list_IDs)

    def corupt(self, x):
        """
        Add noise to multiple ts
        Input: 
            x: (n_channel, n_length)
        Output: 
            x: (n_channel, n_length)
        """
        single_channel = x[0,:]
        single_channel = (single_channel - single_channel.mean()) / single_channel.std() 
        single_channel = single_channel[None, :]

        multi_channel = np.repeat(single_channel, repeats=19, axis=0) 
        multi_channel += np.random.normal(scale=0.7, size=multi_channel.shape)
        return multi_channel
    
    def __getitem__(self, index):
        path = self.dir + self.list_IDs[index]
        sample = pickle.load(open(path, 'rb'))
        X, y = sample['X'], sample['y']
        
        # original y.unique = [0, 1, 2, 3, 5]
        if y == 'W':
            y = 0
        elif y == 'R':
            y = 4
        elif y in ['1', '2', '3']:
            y = int(y)
        elif y == '4':
            y = 3
        else:
            y = 0
        
        y = torch.LongTensor([y])

        X = self.corupt(X)

        return torch.FloatTensor(X), y

pretext_dir = './Spectral_Explainability/cassette_processed/pretext/'
train_dir = './Spectral_Explainability/cassette_processed/train/'
test_dir = './Spectral_Explainability/cassette_processed/test/'

pretext_index = os.listdir(pretext_dir)
train_index = os.listdir(train_dir)
train_index = train_index[:len(train_index)//2]
test_index = os.listdir(test_dir)

print ('pretext (all patient): ', len(pretext_index))
print ('train (all patient): ', len(train_index))
print ('test (all) patient): ', len(test_index))

pretext_loader = torch.utils.data.DataLoader(SLEEPCALoader(pretext_index, pretext_dir, True), 
                batch_size=128, shuffle=True)
train_loader = torch.utils.data.DataLoader(SLEEPCALoader(train_index, train_dir, False), 
                batch_size=128, shuffle=False)
test_loader = torch.utils.data.DataLoader(SLEEPCALoader(test_index, test_dir, False), 
                batch_size=128, shuffle=False)

# define and initialize the model
device = 'cuda'

model = get_model()
model.to(device)

pretext (all patient):  414259
train (all patient):  33195
test (all) patient):  83077


ModelMDD(
  (channel_dropout): ChannelDropout(rate=0.25, noise_shape=None, seed=None)
  (conv1): Conv1d(19, 5, kernel_size=(10,), stride=(1,))
  (conv2): Conv1d(5, 10, kernel_size=(10,), stride=(1,))
  (conv3): Conv1d(10, 10, kernel_size=(10,), stride=(1,))
  (conv4): Conv1d(10, 15, kernel_size=(5,), stride=(1,))
  (bn1): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm1d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=2715, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_fe

In [34]:
pip install wandb

Defaulting to user installation because normal site-packages is not writeable
Collecting wandb
  Downloading wandb-0.18.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m80.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting gitpython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 KB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0
  Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitdb<5,>=4.0.1
  Using cached gitdb-4.0.11-py3-none-any.whl (62 kB)
Collecting smmap<6,>=3.0.1
  Using cached smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected packages: smmap, docker-pycreds, gitdb, gitpython, wandb
Successfully installed docker-pycreds-0.4.0 gitdb-4.0.11 gitpython-3.1.43 smmap-5.0.1 wandb-0.18.5
Note: you may need to r

In [6]:
experiment = Experiment(
  api_key="sPRzejvIjrBCMFoZZDmZSAec3",
  project_name="neroml",
  workspace="ansafronov"
)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/ansafronov/neroml/734cdb67211149178a5308e85dfca84c



In [7]:
from tqdm import tqdm
import gc 
from comet_ml import Experiment

def train(model, optimizer, Epoch, loss_func, train_loader, test_loader):

    model.train()
    acc_list = []

    experiment.log_parameters({
        "learning_rate": optimizer.param_groups[0]['lr'],
        "batch_size": train_loader.batch_size,
        "epochs": Epoch,
        "optimizer": "Adam",
        "model_architecture": "CNN"
    })

    # train
    for epoch in range(Epoch):
        print ()
        # Train
        correct_train, total_train, loss_train = [], [], []
        max_min = [0,0]
        conf_matrix = np.zeros((5, 5))
        for idx, (X_train, y_train) in enumerate(tqdm(train_loader, desc='Training')):
            X_train, y_train = X_train, y_train.flatten()
            X_train, y_train = X_train.to(device), y_train.to(device)

            # backpropagation
            optimizer.zero_grad()
            pred = model(X_train)
            loss = loss_func(pred, y_train)
            loss.backward()
            optimizer.step()
            
            # print ('compute accuracy')
            total_train.append(y_train.shape[0])
            correct_train.append((torch.argmax(pred.data, 1) == y_train).sum().item())
            loss_train.append(loss.item())

            # conf_matrix += confusion_matrix(y_train.detach().cpu().numpy(), torch.argmax(pred.data, 1).detach().cpu().numpy())

        print(conf_matrix)
            
        print ("epoch: {}, avg_loss: {}, train accuracy: {:.2f}%".format(epoch, sum(loss_train) / len(loss_train), sum(correct_train) / sum(total_train) * 100))

        gc.collect()

        # # evaluation
        # with torch.no_grad():
        #     model.eval()
            

        #     conf_matrix = np.zeros((5, 5))
        #     correct_test, total_test = 0, 0
        #     for idx, (X_test, y_test) in enumerate(tqdm(test_loader, desc='Evaluating')):
        #         X_test, y_test = X_test, y_test.flatten()
        #         X_test, y_test = X_test.to(device), y_test

        #         y_pred = torch.argmax(model(X_test), 1).detach().cpu().numpy()
        #         correct_test += np.sum(y_pred == y_test.numpy())
        #         total_test += y_test.numpy().shape[0]

        #         conf_matrix += confusion_matrix(y_test.numpy(), y_pred)

        #     print ("------------------------------")
        #     print ("epoch: {}, train accuracy: {:.2f}%, test accuracy: {:.2f}%".format(epoch, sum(correct_train) / sum(total_train) * 100, correct_test / total_test * 100))

        #     print(conf_matrix)
            
        #     acc_list.append(correct_test / total_test)
        #     # print (confusion_matrix(pred, target))

        #     model.train()
        
        if epoch > 10:
            print ('recent five epoch, mean: {}, std: {}'.format(np.mean(acc_list[-10:]), np.std(acc_list[-10:])))

        experiment.log_metric("loss", loss, step=epoch)
        experiment.log_metric("accuracy", sum(correct_train) / sum(total_train) * 100, step=epoch)

        torch.save(model.state_dict(), 'sleep_pretrain.pt')


In [9]:
device='cuda'

In [10]:
model = get_model()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.078)
loss_func = nn.CrossEntropyLoss()

# training
train(model, optimizer, 20, loss_func, train_loader, test_loader)

experiment.end()

NameError: name 'train_loader' is not defined

In [None]:
import os
import time

import yaml
from dotmap import DotMap


def get_config_from_yaml(yaml_file):
    """
    Get the config from a yaml file
    :param yaml_file:
    :return: config(namespace) or config(dictionary)
    """
    # parse the configurations from the config yaml file provided
    with open(yaml_file, 'r') as config_file:
        config_dict = yaml.safe_load(config_file)

    # convert the dictionary to a namespace using bunch lib
    config = DotMap(config_dict)

    return config, config_dict


def process_config(yaml_file):
    config, _ = get_config_from_yaml(yaml_file)

    return config

config = process_config('config.yaml')

class RnnModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config

        # Assign parameters
        self.filter_base = config.network.filter_base
        self.kernel_size = config.network.kernel_size
        self.max_pooling = config.network.max_pooling
        self.num_blocks = config.network.num_blocks
        self.num_channels = len(config.data_loader.modalities) + 1
        self.num_classes = config.data_loader.num_classes
        self.rnn_bidirectional = config.network.rnn_bidirectional
        self.rnn_num_layers = config.network.rnn_num_layers
        self.rnn_num_units = config.network.rnn_num_units if config.network.rnn_num_units is not None else 4 * \
            self.filter_base * (2 ** (self.num_blocks - 1))

        # Create network
        if self.num_channels != 1:
            self.mixing_block = nn.Sequential(OrderedDict([
                ('mix_conv', nn.Conv2d(1, self.num_channels, (self.num_channels, 1))),
                ('mix_batchnorm', nn.BatchNorm2d(self.num_channels)),
                ('mix_relu', nn.ReLU())
            ]))

        # Define shortcut
        self.shortcuts = nn.ModuleList([
            nn.Sequential(OrderedDict([
                ('shortcut_conv_{}'.format(k), nn.Conv2d(
                    in_channels=self.num_channels if k == 0 else 4 *
                    self.filter_base * (2 ** (k - 1)),
                    out_channels=4 * self.filter_base * (2 ** k),
                    kernel_size=(1, 1)))
            ])) for k in range(self.num_blocks)
        ])

        # Define basic block structure
        self.blocks = nn.ModuleList([
            nn.Sequential(OrderedDict([
                ("conv_{}_1".format(k), nn.Conv2d(
                    in_channels=self.num_channels if k == 0 else 4 * self.filter_base *
                    (2 ** (k - 1)),
                    out_channels=self.filter_base * (2 ** k),
                    kernel_size=(1, 1))),
                # ("padding_{}".format(k), nn.ConstantPad2d([1, 1, 0, 0], 0)),
                ("batchnorm_{}_1".format(k), nn.BatchNorm2d(
                    self.filter_base * (2 ** k))),
                ("relu_{}_1".format(k), nn.ReLU()),
                ("conv_{}_2".format(k), nn.Conv2d(
                    in_channels=self.filter_base * (2 ** k),
                    out_channels=self.filter_base * (2 ** k),
                    kernel_size=(1, self.kernel_size),
                    padding=(0, self.kernel_size // 2))),
                ("batchnorm_{}_2".format(k), nn.BatchNorm2d(
                    self.filter_base * (2 ** k))),
                ("relu_{}_2".format(k), nn.ReLU()),
                ("conv_{}_3".format(k), nn.Conv2d(
                    in_channels=self.filter_base * (2 ** k),
                    out_channels=4 * self.filter_base * (2 ** k),
                    kernel_size=(1, 1))),
                ("batchnorm_{}_3".format(k), nn.BatchNorm2d(
                    4 * self.filter_base * (2 ** k)))
            ])) for k in range(self.num_blocks)
        ])
        self.maxpool = nn.MaxPool2d(kernel_size=(1, self.max_pooling))
        self.relu = nn.ReLU()

        if self.rnn_num_units == 0:

            # Classification (outputs only logits)
            self.classification = nn.Conv1d(
                in_channels=4 * self.filter_base * (2 ** (self.num_blocks - 1)),
                out_channels=self.num_classes,
                kernel_size=1)
        else:

            # Temporal processing
            self.temporal_block = nn.GRU(
                input_size=4 * self.filter_base * (2 ** (self.num_blocks - 1)),
                hidden_size=self.rnn_num_units, num_layers=self.rnn_num_layers,
                batch_first=True, dropout=0, bidirectional=self.rnn_bidirectional)
            self.temporal_block.flatten_parameters()

            # Classification (outputs only logits)
            self.classification = nn.Conv1d(
                in_channels=(1 + self.rnn_bidirectional) * self.rnn_num_units,
                out_channels=self.num_classes,
                kernel_size=1)
        # self.classification = nn.Sequential(OrderedDict([
        #     ('cls_conv', nn.Conv1d(
        #         in_channels=(1 + self.rnn_bidirectional) *
        #         self.rnn_num_units,
        #         out_channels=self.num_classes,
        #         kernel_size=1)),
        #     ('softmax', nn.Softmax(dim=1))
        # ]))

    def forward(self, x):

        # if self.temporal_block:
        #     self.temporal_block.flatten_parameters()

        z = self.mixing_block(x)
        for block, shortcut in zip(self.blocks, self.shortcuts):
            y = shortcut(z)
            z = block(z)
            z += y
            z = self.relu(z)
            z = self.maxpool(z)
        # print(z.shape)
        # RNN part
        # print(self.rnn_num_units)
        if self.rnn_num_units == 0:
            z = self.classification(z.squeeze(2))
            # print('Hej! ' + str(z.shape))
        else:
            z = self.temporal_block(z.squeeze(2).transpose(1, 2))
            # print(z[0].shape)
            z = self.classification(z[0].transpose(1, 2))

        return z
    
model_1 = RnnModel(config)

ModuleNotFoundError: No module named 'dotmap'

In [71]:
torch.save(model.state_dict(), 'sleep_pretrain.pt')

In [91]:
from sklearn.metrics import confusion_matrix


with torch.no_grad():
    model.eval()
    
    pred, target = [], []
    for idx, (X_test, y_test) in enumerate(tqdm(test_loader, desc='Evaluating')):
        X_test, y_test = X_test, y_test.flatten()
        X_test, y_test = X_test.to(device), y_test

        y_pred = torch.argmax(model(X_test), 1).data.cpu()
        pred += list(y_pred); target += list(y_test.numpy())

    conf_matr = confusion_matrix(target, pred)
    print(conf_matr)

Evaluating:  19%|█▊        | 69/369 [00:11<00:49,  6.06it/s]


KeyboardInterrupt: 

In [97]:
model.to(device)

model(X_test)

tensor([[0.1731, 0.0865, 0.3641, 0.2291, 0.1472],
        [0.3443, 0.2227, 0.1733, 0.1562, 0.1035],
        [0.1386, 0.1087, 0.4471, 0.1579, 0.1477],
        [0.1756, 0.3059, 0.1457, 0.2733, 0.0995],
        [0.1338, 0.2841, 0.3023, 0.1501, 0.1296],
        [0.2825, 0.1162, 0.1533, 0.1659, 0.2821],
        [0.3896, 0.0923, 0.2929, 0.1433, 0.0820],
        [0.3027, 0.1222, 0.2159, 0.1681, 0.1911],
        [0.0928, 0.1326, 0.2426, 0.4100, 0.1220],
        [0.1323, 0.1878, 0.3243, 0.2350, 0.1206],
        [0.0697, 0.1921, 0.1856, 0.3282, 0.2244],
        [0.2592, 0.1750, 0.3134, 0.2079, 0.0445],
        [0.1190, 0.3300, 0.1804, 0.1944, 0.1762],
        [0.1325, 0.1038, 0.1483, 0.1965, 0.4190],
        [0.1567, 0.1361, 0.1523, 0.3705, 0.1844],
        [0.1161, 0.1572, 0.2656, 0.3263, 0.1348],
        [0.1515, 0.2233, 0.0951, 0.3053, 0.2248],
        [0.3942, 0.1128, 0.3018, 0.1093, 0.0818],
        [0.2226, 0.3168, 0.1800, 0.0872, 0.1934],
        [0.1957, 0.1268, 0.3267, 0.2393, 0.1114],


In [69]:
from sklearn.metrics import confusion_matrix

conf_matr = confusion_matrix(target, pred)
conf_matr

array([[14493,     0,     0,     0,     0],
       [ 1176,     0,     0,     0,     0],
       [ 3813,     0,     0,     0,     0],
       [  548,     0,     0,     0,     0],
       [ 1454,     0,     0,     0,     0]])

In [68]:
conf_matr

array([[14493,     0,     0,     0,     0],
       [ 1176,     0,     0,     0,     0],
       [ 3813,     0,     0,     0,     0],
       [  548,     0,     0,     0,     0],
       [ 1454,     0,     0,     0,     0]])

In [None]:
def calculate_metrics(confusion_matrices, class_mapping):
    n_classes = len(class_mapping)
    
    # Initialize arrays to store metrics
    precision = np.zeros((len(confusion_matrices), n_classes))
    recall = np.zeros((len(confusion_matrices), n_classes))
    f1_score = np.zeros((len(confusion_matrices), n_classes))
    
    for i, cm in enumerate(confusion_matrices):
        for class_name, class_index in class_mapping.items():
            tp = cm[class_index, class_index]
            fp = np.sum(cm[:, class_index]) - tp
            fn = np.sum(cm[class_index, :]) - tp
            
            # Calculate precision
            precision[i, class_index] = tp / (tp + fp) if (tp + fp) > 0 else 0
            
            # Calculate recall
            recall[i, class_index] = tp / (tp + fn) if (tp + fn) > 0 else 0
            
            # Calculate F1-score
            f1_score[i, class_index] = 2 * (precision[i, class_index] * recall[i, class_index]) / (precision[i, class_index] + recall[i, class_index]) if (precision[i, class_index] + recall[i, class_index]) > 0 else 0
    
    # Calculate mean and standard deviation for each metric
    mean_precision = np.mean(precision, axis=0) * 100
    std_precision = np.std(precision, axis=0) * 100
    mean_recall = np.mean(recall, axis=0) * 100
    std_recall = np.std(recall, axis=0) * 100
    mean_f1 = np.mean(f1_score, axis=0) * 100
    std_f1 = np.std(f1_score, axis=0) * 100
    
    # Print results
    print("Class\t\tPrecision\t\tRecall\t\t\tF1-Score")
    print("-" * 70)
    for class_name, class_index in class_mapping.items():
        print(f"{class_name}\t\t{mean_precision[class_index]:.2f}±{std_precision[class_index]:.2f}\t\t{mean_recall[class_index]:.2f}±{std_recall[class_index]:.2f}\t\t{mean_f1[class_index]:.2f}±{std_f1[class_index]:.2f}")

    
class_mapping = {
    "Awake": 4,
    "NREM1": 0,
    "NREM2": 1,
    "NREM3": 2,
    "REM": 3
}

class_mapping