In [1]:
import sys
import os
import numpy as np
from sklearn.utils import shuffle
from tqdm import tqdm
sys.path.append('../../')

project_path = os.path.abspath(os.path.relpath('../../../../', os.getcwd()))
preprocessed_path=os.path.join(project_path,'BilinearNetwork/Data/PreprocessedData/CHB-MIT/STFT/')
save_folder=os.path.join(project_path,'BilinearNetwork/Data/PreprocessedData/CHB-MIT/Concanate/')

Merge the preictal and interictal files to STFT folder

In [2]:


def merge_patient_files(root_folder):
    for i in tqdm(range(1,25)):  # 遍历从chb00到chb24的文件
        patient_id = "chb{:02d}".format(i)  # 根据索引生成病人编号
        preictal_filename = patient_id + "_preictal.npz"
        interictal_filename = patient_id + "_interictal.npz"

        if os.path.exists(os.path.join(root_folder, preictal_filename)) and \
                os.path.exists(os.path.join(root_folder, interictal_filename)):
            # 读取数据
            preictal_data = np.load(os.path.join(root_folder, preictal_filename))['data']
            preictal_label = np.load(os.path.join(root_folder, preictal_filename))['label']
            interictal_data = np.load(os.path.join(root_folder, interictal_filename))['data']
            interictal_label = np.load(os.path.join(root_folder, interictal_filename))['label']

            # 合并数据和标签
            merged_data = np.concatenate((preictal_data, interictal_data), axis=0).astype(np.float32)
            merged_label = np.concatenate((preictal_label, interictal_label), axis=0).astype(np.float32)

            # 洗牌
            merged_data, merged_label = shuffle(merged_data, merged_label)

            # 保存到新文件
            save_path = os.path.join(save_folder, patient_id + ".npz")
            np.savez(save_path, data=merged_data, label=merged_label)

            # os.remove(os.path.join(root_folder, preictal_filename))
            # os.remove(os.path.join(root_folder, interictal_filename))


merge_patient_files(preprocessed_path)


Build the folder and move the files into train/test folder

In [3]:
save_folder_train=os.path.join(project_path,'BilinearNetwork/Data/PreprocessedData/CHB-MIT/Concanate/Train/')
save_folder_test=os.path.join(project_path,'BilinearNetwork/Data/PreprocessedData/CHB-MIT/Concanate/Test/')
save_folder_valid=os.path.join(project_path,'BilinearNetwork/Data/PreprocessedData/CHB-MIT/Concanate/Valid/')
if not os.path.exists(save_folder_train):
    os.makedirs(save_folder_train)
if not os.path.exists(save_folder_test):
    os.makedirs(save_folder_test)
if not os.path.exists(save_folder_valid):
    os.makedirs(save_folder_valid)

In [4]:
import shutil
test_patient=['chb04','chb06','chb10','chb19']
for file in [f for f in os.listdir(save_folder) if f.endswith('.npz')]:
    patient_id=file.split('.')[0]

    if patient_id in test_patient:
        shutil.move(os.path.join(save_folder,file),os.path.join(save_folder_test,file))
    else:
        shutil.move(os.path.join(save_folder,file),os.path.join(save_folder_train,file))

Slicing

In [5]:
import os
import numpy as np
from tqdm import tqdm
def split_npz_files(input_folder, output_folder, samples_per_file=20000):
    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 获取输入文件夹下的所有npz文件
    npz_files = [f for f in os.listdir(input_folder) if f.endswith('.npz')]
    npz_files.sort()  # 确保文件按顺序处理

    current_samples = []  # 当前正在处理的样本
    current_file_index = 0  # 当前输出文件编号

    for npz_file in tqdm(npz_files):
        # 加载npz文件
        data = np.load(os.path.join(input_folder, npz_file))
        samples, labels = data['data'], data['label']

        # 将当前npz文件中的样本加入到当前正在处理的样本列表中
        current_samples.extend(zip(samples, labels))

        # 如果当前样本数量达到或超过了指定的每个文件的样本数量
        while len(current_samples) >= samples_per_file:
            # 创建一个新的npz文件
            output_filename = f"data_{current_file_index}.npz"
            output_path = os.path.join(output_folder, output_filename)

            # 从当前样本列表中取出指定数量的样本
            batch_samples = current_samples[:samples_per_file]
            current_samples = current_samples[samples_per_file:]

            # 将样本拆分为data和label
            batch_data, batch_labels = zip(*batch_samples)

            # 保存样本到新的npz文件
            np.savez(output_path, data=np.array(batch_data), label=np.array(batch_labels))

            # 更新当前输出文件编号
            current_file_index += 1

    # 处理最后剩余的样本
    if current_samples:
        output_filename = f"data_{current_file_index}.npz"
        output_path = os.path.join(output_folder, output_filename)

        batch_data, batch_labels = zip(*current_samples)
        np.savez(output_path, data=np.array(batch_data), label=np.array(batch_labels))

# 调用函数
# split_npz_files(save_folder_test, save_folder_test)
split_npz_files(save_folder_train, save_folder_train)

Delete intermediate files

In [6]:
def delete_files(folder):
    for file in [f for f in os.listdir(folder) if f.endswith('.npz')]:
        if file.startswith('chb'):
            os.remove(os.path.join(folder, file))
# delete_files(save_folder_test)
delete_files(save_folder_train)

Validate any file

In [7]:
# np.load("E:\Research\BilinearNetwork\Data\PreprocessedData\CHB-MIT\Concanate\Train\data_0.npz")['label'][:20]

Valid Set

In [8]:
def combineValidSet(num_leave_out=500):
    file_list = os.listdir(save_folder_train)
    datas = None
    labels = None
    for file_name in tqdm(file_list):
        file = np.load(os.path.join(save_folder_train, file_name), mmap_mode='r', allow_pickle=False)
        local_data=file['data']
        local_labels=file['label']
        sample=len(local_data)
        del file
        if datas is None:
            datas=local_data[sample-num_leave_out:]
            labels=local_labels[sample-num_leave_out:]
        else:
            datas=np.concatenate([datas,local_data[sample-num_leave_out:]])
            labels=np.concatenate([labels,local_labels[sample-num_leave_out:]])
        print(datas.shape)
        del local_data,local_labels
    np.savez(os.path.join(save_folder_valid,"valid_data.npz"),data=datas,label=labels)
combineValidSet()