In [None]:
import os
import shutil
import re
from tqdm import tqdm

def get_dir_size(path='.'):
    total = 0
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                total += entry.stat().st_size
            elif entry.is_dir():
                total += get_dir_size(entry.path)
    return total

datadir = r'/mnt/truenas_jiangxiaotian/Edataset/complexE/b7fd11d4af74b4ffddaa0161e9d3dfac'
miedir = r'/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_Amphase/b7fd_E_mie_train'
if not os.path.exists(miedir):
        os.makedirs(miedir)
print(f'文件数{len(os.listdir(datadir))}')
print(f'文件夹大小{get_dir_size(datadir) / (1024 ** 3):.2f} GB')
for filename in tqdm(os.listdir(datadir)):
    # ([a-zA-Z0-9]{4})_theta(\d+)(?:_)?phi(\d+)(?:_)?f(\d+\.\d+).pt
    plane, theta, phi, f = re.match(r'([A-Za-z0-9]{4})_theta(\d+)(?:_)?phi(\d+)(?:_)?f(\d+\.\d+).pt', filename).groups()
    theta = int(theta); phi = int(phi); f = float(f)
    if f >= 0.1 and f <= 1:
        src = os.path.join(datadir, filename)
        dst = os.path.join(miedir, filename)
        shutil.copy(src, dst)
print(f'mie文件数{len(os.listdir(miedir))}')
print(f'mie文件夹大小{get_dir_size(miedir) / (1024 ** 3):.2f} GB')


文件数19320
文件夹大小112.25 GB


100%|██████████| 19320/19320 [08:34<00:00, 37.52it/s]


mie文件数7659
mie文件夹大小44.50 GB


In [None]:
import torch
def convert_amphase_to_realimage(amphase_dir, realimage_dir):
    """
    将包含振幅相位信息的.pt文件转换为包含实部虚部信息的.pt文件。

    输入张量维度: (360, 720, 6)
    [Abs(E), Abs(Theta), Phase(Theta), Abs(Phi), Phase(Phi), Ax.Ratio]

    输出张量维度: (360, 720, 4)
    [Re(E_theta), Im(E_theta), Re(E_phi), Im(E_phi)]
    """
    # 1. 创建目标文件夹
    if not os.path.exists(realimage_dir):
        os.makedirs(realimage_dir)
    print(f"源文件夹 (振幅相位): {amphase_dir}")
    print(f"目标文件夹 (实部虚部): {realimage_dir}")

    # 2. 获取待处理的文件列表
    try:
        file_list = os.listdir(amphase_dir)
        print(f"检测到 {len(file_list)} 个文件，开始转换...")
    except FileNotFoundError:
        print(f"错误：源文件夹不存在: {amphase_dir}")
        return

    # 3. 循环处理每个文件
    for filename in tqdm(file_list, desc="转换进度"):
        # 使用正则表达式确保只处理符合命名规则的文件
        match = re.match(r'([A-Za-z0-9]{4})_theta(\d+)(?:_)?phi(\d+)(?:_)?f(\d+\.\d+)\.pt', filename)
        if not match:
            print(f"\n跳过不符合命名规则的文件: {filename}")
            continue

        try:
            # 构建完整的文件路径
            src_path = os.path.join(amphase_dir, filename)

            # 加载 PyTorch 张量
            # 假设张量保存在CPU上
            amphase_tensor = torch.load(src_path, map_location=torch.device('cpu'))
        
            # 提取所需的振幅和相位通道
            # 通道 1: Abs(Theta), 通道 2: Phase(Theta)
            # 通道 3: Abs(Phi),   通道 4: Phase(Phi)
            Etotal = amphase_tensor[:, :, 0]  # Abs(E)
            amp_theta = amphase_tensor[:, :, 1]
            phase_theta_deg = amphase_tensor[:, :, 2]
            amp_phi = amphase_tensor[:, :, 3]
            phase_phi_deg = amphase_tensor[:, :, 4]

            # 将相位从度(degree)转换为弧度(radian)
            # 使用 torch.pi 以保证精度和计算效率
            phase_theta_rad = phase_theta_deg * (torch.pi / 180.0)
            phase_phi_rad = phase_phi_deg * (torch.pi / 180.0)

            # 核心转换：根据欧拉公式计算实部和虚部
            # Real = Amplitude * cos(Phase_rad)
            # Imag = Amplitude * sin(Phase_rad)
            re_theta = amp_theta * torch.cos(phase_theta_rad)
            im_theta = amp_theta * torch.sin(phase_theta_rad)
            re_phi = amp_phi * torch.cos(phase_phi_rad)
            im_phi = amp_phi * torch.sin(phase_phi_rad)

            # 将四个部分堆叠成一个新的  张量
            # realimage_tensor = torch.stack([Etotal, re_theta, im_theta, re_phi, im_phi], dim=-1) # (360, 720, 5),第0维为 Etotal
            realimage_tensor = torch.stack([re_theta, im_theta, re_phi, im_phi], dim=-1) # (360, 720, 4)

            # 构建新的文件名（在.pt前加上_RI）
            base_name, ext = os.path.splitext(filename)
            new_filename = f"{base_name}_RI{ext}"
            dst_path = os.path.join(realimage_dir, new_filename)

            # 保存转换后的新张量
            torch.save(realimage_tensor, dst_path)

        except Exception as e:
            print(f"\n处理文件 {filename} 时发生错误: {e}")

    print("\n所有文件转换完成!")
    print(f"转换后的文件已保存至: {realimage_dir}")
    print(f"转换后的文件数: {len(os.listdir(realimage_dir))}")
    print(f"转换后的文件夹大小: {get_dir_size(realimage_dir) / (1024 ** 3):.2f} GB")

amphase_source_dir = r'/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_Amphase/b7fd_E_mie_train'
realimage_target_dir = r'/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_train'
convert_amphase_to_realimage(amphase_source_dir, realimage_target_dir)


源文件夹 (振幅相位): /mnt/truenas_jiangxiaotian/Edataset/complexE_mie_Amphase/b7fd_E_mie_AmPhase
目标文件夹 (实部虚部): /mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_train
检测到 7659 个文件，开始转换...


转换进度: 100%|██████████| 7659/7659 [09:48<00:00, 13.01it/s]



所有文件转换完成!
转换后的文件已保存至: /mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_train
转换后的文件数: 7659
转换后的文件夹大小: 29.67 GB


In [3]:
traindir = r'/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_train'
print(os.path.dirname(traindir))
print(os.path.basename(traindir))
print(os.path.basename(traindir).replace('_train','_val'))
print(os.path.join(os.path.dirname(traindir), os.path.basename(traindir).replace('_train','_val')))
# valdir = os.path.dirname(traindir) + os.path.basename(traindir).replace('_train','_val')


/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage
b7fd_E_mie_train
b7fd_E_mie_val
/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_val


In [6]:
traindir = realimage_target_dir
print(f'原训练集地址{traindir}')
print(f'原训练集文件数{len(os.listdir(traindir))}')
print(f'原训练集文件夹大小{get_dir_size(traindir) / (1024 ** 3):.2f} GB')

valdir = os.path.join(os.path.dirname(traindir), os.path.basename(traindir).replace('_train','_val'))
train10dir = os.path.join(os.path.dirname(traindir), os.path.basename(traindir).replace('_train','_10train'))
train50dir = os.path.join(os.path.dirname(traindir), os.path.basename(traindir).replace('_train','_50train'))

def split_val(src_dir, val_dir, val_ratio=0.1):
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)

    files = os.listdir(src_dir)
    total_files = len(files)
    val_size = int(total_files * val_ratio)

    for i, filename in enumerate(tqdm(files)):
        src_file = os.path.join(src_dir, filename)
        if i < val_size:
            dst_file = os.path.join(val_dir, filename)
            shutil.move(src_file, dst_file)
        else:
            break
    print(f'验证集地址{val_dir}')
    print(f'原总集文件数{len(os.listdir(src_dir))}')
    print(f'验证集文件数{len(os.listdir(val_dir))}')
    print(f'验证集文件夹大小{get_dir_size(val_dir) / (1024 ** 3):.2f} GB')
    
def split_train(src_dir, train_dir, train_ratio=0.1):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)

    files = os.listdir(src_dir)
    total_files = len(files)
    train_size = int(total_files * train_ratio)

    for i, filename in enumerate(tqdm(files)):
        src_file = os.path.join(src_dir, filename)
        if i < train_size:
            dst_file = os.path.join(train_dir, filename)
            shutil.copy(src_file, dst_file)
        else:
            break
    print(f'训练集地址{train_dir}')
    print(f'{train_ratio}训练集文件数{len(os.listdir(train_dir))}')
    print(f'{train_ratio}训练集文件夹大小{get_dir_size(train_dir) / (1024 ** 3):.2f} GB')

# Split validation set
split_val(traindir, valdir, val_ratio=0.1)
# Split training set into 10% and 50%
split_train(traindir, train10dir, train_ratio=0.1)
split_train(traindir, train50dir, train_ratio=0.5)


原训练集地址/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_train
原训练集文件数7659
原训练集文件夹大小29.67 GB


 10%|▉         | 765/7659 [00:02<00:21, 313.96it/s]


验证集地址/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_val
原总集文件数6894
验证集文件数765
验证集文件夹大小2.96 GB


 10%|▉         | 689/6894 [00:31<04:45, 21.72it/s]


训练集地址/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_10train
0.1训练集文件数689
0.1训练集文件夹大小2.67 GB


 50%|█████     | 3447/6894 [03:01<03:01, 18.94it/s]


训练集地址/mnt/truenas_jiangxiaotian/Edataset/complexE_mie_RealImage/b7fd_E_mie_50train
0.5训练集文件数3447
0.5训练集文件夹大小13.35 GB
