生成低分辨图像数据

In [1]:
# from PIL import Image
# import os
# import random

# # 定义一个插值方法的列表
# methods = [Image.NEAREST, Image.BOX, Image.BILINEAR, Image.HAMMING, Image.BICUBIC, Image.LANCZOS]

# # 定义源文件夹和目标文件夹
# src_dir = '/home/cxmd/文档/data_for_AI_train/Particles/hr'
# dst_dir = '/home/cxmd/文档/data_for_AI_train/Particles/lr'

# # 遍历源文件夹中的所有文件
# for filename in os.listdir(src_dir):
#     # 检查文件是否为jpg文件
#     if filename.endswith('.jpg'):
#         # 打开图像
#         img = Image.open(os.path.join(src_dir, filename))

#         # 获取图像的尺寸
#         width, height = img.size

#         # 计算新的尺寸（这里我们将图像的宽度和高度都减半）
#         new_width = width // 2
#         new_height = height // 2

#         # 随机选择一个插值方法
#         method = random.choice(methods)

#         # 使用选定的插值方法进行下采样
#         low_res_img = img.resize((new_width, new_height), method)

#         # 保存低分辨率图像
#         low_res_img.save(os.path.join(dst_dir, filename))

数据预处理

In [2]:
import os
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, random_split

# 检查是否有可用的GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CustomImageDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.img_names = os.listdir(img_dir)
        self.transform = transform

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image.to(device)  # 将数据移动到GPU上

# 定义转换操作
transform = transforms.Compose([
    transforms.Resize((768, 1024)),  # 将所有图像调整为相同的大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 定义源文件夹
hr_dir = '/home/cxmd/文档/data_for_AI_train/Particles/hr'
lr_dir = '/home/cxmd/文档/data_for_AI_train/Particles/lr'

# 加载数据
hr_dataset = CustomImageDataset(hr_dir, transform=transform)
lr_dataset = CustomImageDataset(lr_dir, transform=transform)

# 计算训练集和测试集的大小
train_size = int(0.9 * len(hr_dataset))
test_size = len(hr_dataset) - train_size

# 分割数据集
hr_train_dataset, hr_test_dataset = random_split(hr_dataset, [train_size, test_size])
lr_train_dataset, lr_test_dataset = random_split(lr_dataset, [train_size, test_size])

# 创建数据加载器
hr_train_loader = torch.utils.data.DataLoader(hr_train_dataset, batch_size=1, shuffle=True)
hr_test_loader = torch.utils.data.DataLoader(hr_test_dataset, batch_size=1, shuffle=True)
lr_train_loader = torch.utils.data.DataLoader(lr_train_dataset, batch_size=1, shuffle=True)
lr_test_loader = torch.utils.data.DataLoader(lr_test_dataset, batch_size=1, shuffle=True)

定义模型

定义生成器

In [3]:
import torch.nn as nn
import math

class ResidualBlock(nn.Module):
    def __init__(self, in_features):
        super(ResidualBlock, self).__init__()

        # 这个卷积块包含两个卷积层，每个卷积层后面都跟着一个批量归一化层和一个PReLU激活函数
        self.conv_block = nn.Sequential(
            nn.Conv2d(in_features, in_features, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(in_features),
            nn.PReLU(),
            nn.Conv2d(in_features, in_features, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(in_features)
        )

    def forward(self, x):
        # 在前向传播过程中，我们将输入x和卷积块的输出相加，这就是所谓的"残差"连接
        return x + self.conv_block(x)

class UpsampleBlock(nn.Module):
    def __init__(self, in_features, scale_factor):
        super(UpsampleBlock, self).__init__()

        # 这个卷积层将输入的通道数增加到in_features * scale_factor ** 2
        self.conv = nn.Conv2d(in_features, in_features * scale_factor ** 2, kernel_size=3, stride=1, padding=1)
        # PixelShuffle层用于重新排列卷积层输出的通道，实现上采样
        self.pixel_shuffle = nn.PixelShuffle(scale_factor)
        # PReLU激活函数
        self.prelu = nn.PReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.pixel_shuffle(x)
        x = self.prelu(x)
        return x

class Generator(nn.Module):
    def __init__(self, scale_factor):
        upsample_block_num = int(math.log(scale_factor, 2))

        super(Generator, self).__init__()
        # 第一个卷积块
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=9, stride=1, padding=4),
            nn.PReLU()
        )
        # 残差块
        self.block2 = ResidualBlock(64)
        # 第二个卷积块
        self.block3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64)
        )
        # 上采样块
        block4 = [UpsampleBlock(64, 2) for _ in range(upsample_block_num)]
        block4.append(nn.Conv2d(64, 3, kernel_size=9, stride=1, padding=4))
        self.block4 = nn.Sequential(*block4)

    def forward(self, x):
        block1 = self.block1(x)
        block2 = self.block2(block1)
        block3 = self.block3(block2)
        block4 = self.block4(block1 + block3)

        # 使用tanh激活函数将输出限制在[0, 1]范围内
        return (torch.tanh(block4) + 1) / 2


定义判别器

In [4]:
# 定义判别器
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        # 判别器的第一部分是一系列的卷积层，每个卷积层后面都跟着一个LeakyReLU激活函数
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2),

            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),

            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),

            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),

            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
        )

        # 添加一个自适应平均池化层
        self.avgpool = nn.AdaptiveAvgPool2d((16, 16))

        # 判别器的第二部分是一个全连接层，用于将卷积层的输出转换为一个单一的预测值
        self.classifier = nn.Sequential(
            nn.Linear(512*16*16, 1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)  # 添加这一行
        print(x.shape)  # 打印卷积层的输出大小
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


In [5]:
# 创建模型
G = Generator(scale_factor=2).to(device)  # 请根据你的需求设置scale_factor的值
D = Discriminator().to(device)

训练模型

In [6]:
# 定义损失函数
criterion = nn.BCELoss()

# 定义优化器
G_optimizer = torch.optim.Adam(G.parameters(), lr=0.0002)
D_optimizer = torch.optim.Adam(D.parameters(), lr=0.0002)

In [7]:
# 定义训练的轮数
epochs = 100

# 开始训练
for epoch in range(epochs):
    for i, (hr_real, lr_real) in enumerate(zip(hr_train_loader, lr_train_loader)):
        # 将数据移动到GPU上
        hr_real = hr_real.to(device)
        lr_real = lr_real.to(device)

        # 生成假的高分辨率图像
        hr_fake = G(lr_real)

        # 计算生成器的损失
        G_loss = criterion(D(hr_fake), torch.ones_like(D(hr_fake)))

        # 更新生成器的参数
        G_optimizer.zero_grad()
        G_loss.backward()
        G_optimizer.step()

        # 计算判别器的损失
        D_loss_real = criterion(D(hr_real), torch.ones_like(D(hr_real)))
        D_loss_fake = criterion(D(hr_fake.detach()), torch.zeros_like(D(hr_fake)))
        D_loss = (D_loss_real + D_loss_fake) / 2

        # 更新判别器的参数
        D_optimizer.zero_grad()
        D_loss.backward()
        D_optimizer.step()

    # 打印损失
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], G_Loss: {G_loss.item()}, D_Loss: {D_loss.item()}')


torch.Size([1, 512, 16, 16])


OutOfMemoryError: CUDA out of memory. Tried to allocate 768.00 MiB. GPU 0 has a total capacty of 10.91 GiB of which 454.00 MiB is free. Process 3043 has 145.32 MiB memory in use. Including non-PyTorch memory, this process has 9.88 GiB memory in use. Of the allocated memory 9.08 GiB is allocated by PyTorch, and 41.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF