In [None]:
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

import torch    
from torch.utils.data import Dataset, DataLoader

from utils import add_zero_to_id, load_single_ppg, pad_and_create_mask

In [3]:
path_prepro = 'Y:/MESA_0.7.0/ppg_preprocessed'
df_psm = pd.read_csv("./df_delected_psm.csv", index_col=0)

list_fn = os.listdir(path_prepro)
sr_fn = pd.Series(list_fn)

# Load data
list_ppg = []
list_labels = []
for id in tqdm(df_psm.index): # df_psm의 id 순서대로 파일 업로드
    if sr_fn.str.contains(add_zero_to_id(id)).sum() > 0: # id와 match되는 파일인 경우, 업로드
        list_ppg.append(load_single_ppg(os.path.join(path_prepro, 'mesa-sleep-%s.h5' % add_zero_to_id(id)))) # ppg data append
        list_labels.append(df_psm.loc[id, 'insmnia5']) # label append

padded_list_data, mask = pad_and_create_mask(list_ppg, verobose=True) # zero padding된 뒷 부분을 학습에 관여하지 않게 하기 위하여, padding 된 부분에 대한 mask 생성
list_labels = np.array(list_labels)

# Save whole data
print("Save whole data (ppg, label, mask)...")
np.save(os.path.join("Y:/MESA_0.7.0/ppg_preprocessed_npy", "ppg_ins_hc.npy"), padded_list_data)
np.save(os.path.join("Y:/MESA_0.7.0/ppg_preprocessed_npy", "mask_ins_hc.npy"), mask)
np.save(os.path.join("Y:/MESA_0.7.0/ppg_preprocessed_npy", "label_ins_hc.npy"), list_labels)

# Train test split and save 
data_train, data_test, labels_train, labels_test, mask_train, mask_test = \
    train_test_split(padded_list_data, list_labels, mask, test_size=0.2, random_state=42, shuffle=True)

# Save train data
path_npy_train = "Y:/MESA_0.7.0/ppg_preprocessed_npy/train"
print("Train data save (ppg, label, mask) ...")
np.save(os.path.join(path_npy_train, 'ppg_ins_hc.npy'), data_train)
np.save(os.path.join(path_npy_train, 'label_ins_hc.npy'), labels_train)
np.save(os.path.join(path_npy_train, 'mask_ins_hc.npy'), mask_train)

# Save test data
path_npy_test = "Y:/MESA_0.7.0/ppg_preprocessed_npy/test"
print("Test data save (ppg, label, mask) ...")
np.save(os.path.join(path_npy_test, 'ppg_ins_hc.npy'), data_test)
np.save(os.path.join(path_npy_test, 'label_ins_hc.npy'), labels_test)
np.save(os.path.join(path_npy_test, 'mask_ins_hc.npy'), mask_test)

print(data_train.shape, labels_train.shape, mask_train.shape)
print(data_test.shape, labels_test.shape, mask_test.shape)

  0%|          | 0/198 [00:00<?, ?it/s]

Shape of data: (198, 3305831)
Shape of mask: (198, 3305831)
Save whole data (ppg, label, mask)...
Train data save (ppg, label, mask) ...
Test data save (ppg, label, mask) ...
(158, 3305831) (158,) (158, 3305831)
(40, 3305831) (40,) (40, 3305831)


In [4]:
# Custom Dataset 생성
class gen_dataset(Dataset):
    def __init__(self, num_classes=2, sampling_rate=34.13, train_or_test='default'):

        if 'train' in train_or_test.lower():
            path_load = 'Y:/MESA_0.7.0/ppg_preprocessed_npy/train'
        elif 'test' in train_or_test.lower():
            path_load = 'Y:/MESA_0.7.0/ppg_preprocessed_npy/test'
        else:
            path_load = 'Y:/MESA_0.7.0/ppg_preprocessed_npy'
                    
        self.data = np.load(os.path.join(path_load, "ppg_ins_hc.npy"))
        self.labels = np.load(os.path.join(path_load, "label_ins_hc.npy"))
        self.mask = np.load(os.path.join(path_load, "mask_ins_hc.npy"))

        self.num_samples = self.data.shape[0]
        self.num_classes = num_classes
        self.sampling_rate = sampling_rate 
        self.train_or_test = train_or_test

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        signal = self.data[idx]
        label = self.labels[idx]
        # PyTorch 텐서로 변환
        return torch.tensor(signal, dtype=torch.float32).unsqueeze(0), torch.tensor(label, dtype=torch.long)

In [5]:
train_dataset = gen_dataset(train_or_test='train')
test_dataset = gen_dataset(train_or_test='test')

In [6]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
print(list_ppg[0][-2:])
print(padded_list_data[0][1474347-1:])
print(mask[0][1474347-1:])

[1.37971059e-05 1.37971059e-05]
tensor([1.3797e-05, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00])
tensor([1., 0., 0.,  ..., 0., 0., 0.])


In [None]:
padded_list_data.shape

torch.Size([198, 3305831])