In [3]:
import pandas as pd
import numpy as np 

table_path = "/mnt/guido-data/mass/table.csv"
data_folder = "/mnt/guido-data/mass/raw/"
out_folder = "/mnt/guido-data/mass/emanuele/"


table = pd.read_csv(table_path, index_col = 0)
table.head()

Unnamed: 0,subject_id,start_index,num_windows,end_index,labels,raw,xsleepnet,fold_0,fold_1,fold_2,...,fold_10,fold_11,fold_12,fold_13,fold_14,fold_15,fold_16,fold_17,fold_18,fold_19
0,0,0,1240,1240,/mnt/guido-data/mass/labels/0.npy,/mnt/guido-data/mass/raw/0.npy,/mnt/guido-data/mass/xsleepnet/0.npy,train,train,train,...,train,train,train,train,train,train,train,train,train,train
1,1,1240,1692,2932,/mnt/guido-data/mass/labels/1.npy,/mnt/guido-data/mass/raw/1.npy,/mnt/guido-data/mass/xsleepnet/1.npy,train,train,train,...,train,train,train,train,train,train,test,train,train,train
2,2,2932,1080,4012,/mnt/guido-data/mass/labels/2.npy,/mnt/guido-data/mass/raw/2.npy,/mnt/guido-data/mass/xsleepnet/2.npy,test,train,train,...,train,train,train,train,train,train,train,train,train,train
3,3,4012,1473,5485,/mnt/guido-data/mass/labels/3.npy,/mnt/guido-data/mass/raw/3.npy,/mnt/guido-data/mass/xsleepnet/3.npy,train,valid,train,...,train,train,train,train,test,train,train,train,train,train
4,4,5485,1004,6489,/mnt/guido-data/mass/labels/4.npy,/mnt/guido-data/mass/raw/4.npy,/mnt/guido-data/mass/xsleepnet/4.npy,train,train,train,...,train,train,train,train,train,test,valid,train,train,train


In [21]:
# iter over the rows of the table
from tqdm.autonotebook import tqdm
from scipy.signal import spectrogram
from physioex.preprocess.utils.signal import OnlineVariance
import os 

os.makedirs(out_folder, exist_ok=True)

ov = OnlineVariance( shape = (3, 5, 129) )

input_shape = [0, 3, 3000]
for i, row in tqdm(table.iterrows(), total=table.shape[0]):
    input_shape[0] = row["num_windows"]
    data_path = row["raw"]

    X = np.memmap(data_path, dtype="float32", mode="r", shape=tuple(input_shape))
    
    X = X.reshape( input_shape[0], 3, 5, 600 )
    
    _, _, Sxx = spectrogram(
        X.astype(np.double),
        fs=100,
        window="hamming",
        nperseg=200,
        noverlap=100,
        nfft=256,
    )

    # log_10 scale the spectrogram safely (using epsilon)
    Sxx = 20 * np.log10(np.abs(Sxx) + np.finfo(float).eps)

    Sxx = np.transpose(Sxx, (0, 1, 2, 4, 3))

    Sxx = Sxx.astype(np.float32)
    
    out_path = f"{out_folder}{i}.npy"
    
    # save the spectrogram on a new memmap file
    p_signal_memmap = np.memmap(
       out_path, dtype=np.float32, mode="w+", shape=Sxx.shape
    )
    
    p_signal_memmap[:] = Sxx[:]
    p_signal_memmap.flush()
    del p_signal_memmap
    
    Sxx = np.transpose(Sxx, (0, 2, 1, 3, 4))
    Sxx = Sxx.reshape( -1, 3, 5, 129 )
    
    ov.add(Sxx)

100%|██████████| 200/200 [06:13<00:00,  1.87s/it]


In [31]:
mean, std = ov.compute()

mean = np.repeat( mean.reshape(1, 3, 5, 129), 5, axis = 0)
mean = np.transpose(mean, (1, 0, 2, 3) )
mean.shape

std = np.repeat( std.reshape(1, 3, 5, 129), 5, axis = 0)
std = np.transpose(std, (1, 0, 2, 3) )

print(mean.shape, std.shape)

np.savez(f"{out_folder}scaling.npz", mean = mean, std = std)

(3, 5, 5, 129) (3, 5, 5, 129)


In [35]:
from physioex.data import PhysioExDataset

dataset = PhysioExDataset( 
        datasets = ["mass"],
        data_folder = "/mnt/guido-data/",
        preprocessing = "emanuele",
        selected_channels = ["EEG", "EOG", "EMG"],
        sequence_length = 1,
        indexed_channels = ["EEG", "EOG", "EMG"],
    )

signal, label = dataset[0]
print(signal.shape, label.shape)

signal = np.transpose(signal, (0, 2, 1, 3, 4)).reshape( -1, 3, 5, 129)
signal.shape

torch.Size([1, 3, 5, 5, 129]) torch.Size([1])


torch.Size([5, 3, 5, 129])

In [41]:
from typing import List, Callable

class EmanueleDataset( PhysioExDataset ):
    def __init__(
        self,
        datasets: List[str],
        data_folder: str,
        preprocessing: str = "raw",
        selected_channels: List[int] = ["EEG"],
        sequence_length: int = 21,
        target_transform: Callable = None,
        hpc: bool = False,
        indexed_channels: List[int] = ["EEG", "EOG", "EMG", "ECG"],
        task: str = "sleep",
    ):
        super().__init__(
            datasets = datasets,
            data_folder = data_folder,
            preprocessing = preprocessing,
            selected_channels = selected_channels,
            sequence_length = sequence_length,
            target_transform = target_transform,
            hpc = hpc,
            indexed_channels = indexed_channels,
            task = task,
        )

        self.emanuele_data = PhysioExDataset(
            datasets = datasets,
            data_folder = data_folder,
            preprocessing = "emanuele",
            selected_channels = selected_channels,
            sequence_length = sequence_length,
            target_transform = target_transform,
            hpc = hpc,
            indexed_channels = indexed_channels,
            task = task,
        )
        return

    def __getitem__(self, idx):
        X, _ = super().__getitem__(idx)
        X = X.reshape( self.L, len(self.channels_index), 5, 600)
        y, _ = self.emanuele_data.__getitem__(idx)        
        
        X = X.permute(0, 2, 1, 3)
        y = y.permute(0, 2, 1, 3, 4)
        
        X = X.reshape( self.L*5, len(self.channels_index), 600)
        y = y.reshape( self.L*5, len(self.channels_index), 5, 129)
        
        return X, y


emanuele_data = EmanueleDataset(
    datasets = ["mass"],
    data_folder = "/mnt/guido-data/",
    preprocessing = "raw",
    selected_channels = ["EEG", "EOG", "EMG"],
    sequence_length = 1,
    indexed_channels = ["EEG", "EOG", "EMG"],
)

signal, label = emanuele_data[0]


print(signal.shape, label.shape)


torch.Size([5, 3, 600]) torch.Size([5, 3, 5, 129])
