In [1]:
'''读取加载文件'''

import os
import numpy as np

# 假设文件夹路径格式为 'data1', 'data2', ..., 'data9'
folders = [f"{i}" for i in range(1,12)]

# 初始化两个列表，用于存储不同类型文件的数据
conductance_list = []
labels_list = []

# 遍历每个文件夹
for folder in folders:
    try:
        # 构造文件路径
        conductance_path = os.path.join(folder, "datasets.npy")
        labels_path = os.path.join(folder, "labels.npy")
        
        # 读取文件
        conductance_data = np.load(conductance_path)
        labels_data = np.load(labels_path)
        
        # 添加到对应的列表
        conductance_list.append(conductance_data)
        labels_list.append(labels_data)
        
        # 打印形状
        print(f"Folder: {folder}")
        print(f"  Shape of conductance_datasets: {conductance_data.shape}")
        print(f"  Shape of labels: {labels_data.shape}")
    except FileNotFoundError as e:
        print(f"File not found in folder {folder}: {e}")
    except Exception as e:
        print(f"Error processing folder {folder}: {e}")

# 输出列表的大小（即保存了多少组数据）
print(f"\nNumber of conductance datasets loaded: {len(conductance_list)}")
print(f"Number of labels datasets loaded: {len(labels_list)}")


Folder: 1
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 2
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 3
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 4
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 5
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 6
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 7
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 8
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 9
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 10
  Shape of conductance_datasets: (1000, 5, 15, 15, 4)
  Shape of labels: (1000, 5)
Folder: 11
  Shape of conductance_datasets: (10000, 5, 15, 15, 4)
  S

In [2]:
'''拼接文件'''

import numpy as np

# 假设 conductance_list 和 labels_list 分别是加载的数据列表
# 每个 conductance 的 shape 为 (8000, 5, 15, 15, 4)
# 每个 label 的 shape 为 (8000, 5)

# 检查所有 conductance 和 label 的形状是否一致
assert all(c.shape[1:] == conductance_list[0].shape[1:] for c in conductance_list), "conductance 的形状不一致"
assert all(l.shape[1:] == labels_list[0].shape[1:] for l in labels_list), "label 的形状不一致"

# 拼接 conductance 和 label
conductance_combined = np.concatenate(conductance_list, axis=0)  # (n*8000, 5, 15, 15, 4)
label_combined = np.concatenate(labels_list, axis=0)  # (n*8000, 5)

# 确保拼接后仍然一一对应
assert conductance_combined.shape[0] == label_combined.shape[0], "拼接后 conductance 和 label 的样本数量不匹配"

print("拼接完成")
print("conductance_combined shape:", conductance_combined.shape)
print("label_combined shape:", label_combined.shape)


拼接完成
conductance_combined shape: (20000, 5, 15, 15, 4)
label_combined shape: (20000, 5)


In [3]:
np.save("combined_dataset",conductance_combined)
np.save("combined_label",label_combined)

In [1]:
import numpy as np
import os

# 创建文件夹函数
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

# 加载数据
data = np.load("datasets.npy")
labels = np.load("labels.npy")

# 确保数据和标签长度一致
assert data.shape[0] == labels.shape[0], "数据和标签的数量不一致"

# 随机打乱索引
num_samples = data.shape[0]
indices = np.arange(num_samples)
np.random.shuffle(indices)

# 划分训练集、验证集和测试集
train_size = int(num_samples * 0.8)
vali_size = int(num_samples * 0.1)

train_indices = indices[:train_size]
vali_indices = indices[train_size:train_size + vali_size]
test_indices = indices[train_size + vali_size:]

train_data = data[train_indices]
train_labels = labels[train_indices]
vali_data = data[vali_indices]
vali_labels = labels[vali_indices]
test_data = data[test_indices]
test_labels = labels[test_indices]

# 创建文件夹并保存数据
create_folder("train")
create_folder("vali")
create_folder("test")

np.save("train/train_data.npy", train_data)
np.save("train/train_labels.npy", train_labels)
np.save("vali/vali_data.npy", vali_data)
np.save("vali/vali_labels.npy", vali_labels)
np.save("test/test_data.npy", test_data)
np.save("test/test_labels.npy", test_labels)

# 打印所有数据的形状
print(f"训练集数据形状: {train_data.shape}, 标签形状: {train_labels.shape}")
print(f"验证集数据形状: {vali_data.shape}, 标签形状: {vali_labels.shape}")
print(f"测试集数据形状: {test_data.shape}, 标签形状: {test_labels.shape}")
print("数据已成功划分、保存并打印形状！")


训练集数据形状: (8000, 5, 15, 15, 4), 标签形状: (8000, 5)
验证集数据形状: (1000, 5, 15, 15, 4), 标签形状: (1000, 5)
测试集数据形状: (1000, 5, 15, 15, 4), 标签形状: (1000, 5)
数据已成功划分、保存并打印形状！
