# 生成式对抗网络(GAN)

In [24]:
import pandas as pd
import numpy as np
from pandas import DataFrame
from datetime import timedelta
from numpy import ndarray
from typing import Union, List, Dict
from sklearn.preprocessing import MinMaxScaler
from ultralytics import YOLO
import cv2
from PIL import Image
import os
import json
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Subset
import torch.nn.functional as F
from tqdm import tqdm  # 打印进度条
import math
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torchvision.utils as vutils
import seaborn as sns
from typing import List
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
from sklearn.metrics import r2_score, mean_squared_error
import joblib
import warnings

warnings.filterwarnings("ignore")
plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline

## 基本概念

生成式对抗网络迫使生成图像与真实图像在统计上几乎无法区别，从而生成相当逼真的合成图像。GAN由一个生成器网络(generator)和一个判别式网络(discriminator)组成。判别器的训练目的是能够区分生成器的输出与来自训练集的真实图像，生成器的训练目的是欺骗判别器。生成器从未直接见过训练集中的图像，它所知道的关于数据的信息都来自于判别器。

生成器生成假数据，然后将生成的假数据和真数据都输入判别器，判别器要判断出哪些是真的哪些是假的。判别器第一次判别出来的肯定有很大的误差，然后我们根据误差来优化判别器。现在判别器水平提高了，生成器生成的数据很难再骗过判别器了，所以我们得反过来优化生成器，之后生成器水平提高了，然后反过来继续训练判别器，判别器水平又提高了，再反过来训练生成器，就这样循环往复，直到达到纳什均衡。

**生成网络的损失函数：**
$$L_G=H(1,D(G(z)))$$
上式中，$G$ 代表生成网络，$D $代表判别网络，$H$ 代表交叉熵，$z$ 是输入随机数据。$D(G(z))$是对生成数据的判断概率，1代表数据绝对真实，0代表数据绝对虚假。$H(1,D(G(z)))$代表判断结果与1的距离。显然生成网络想取得良好的效果，那就要做到，让判别器将生成数据判别为真数据（即$D(G(z))$与1的距离越小越好）。

**判别网络的损失函数：**
$$L_D=H(1,D(x))+H(0,D(G(z)))$$
上式中，$x$是真实数据，这里要注意的是，$H(1,D(x))$代表真实数据与1的距离，$H(0,D(G(z)))$代表生成数据与0的距离。显然，识别网络要想取得良好的效果，那么就要做到，在它眼里，真实数据就是真实数据，生成数据就是虚假数据（即真实数据与1的距离小，生成数据与0的距离小）。
 

**理想的损失趋势**
- 生成器的损失 (Generator Loss)：

    - 初期较高，随着训练的进行逐渐下降。
    - 稳定后维持在一个较低水平，但不是接近 0，因为生成器需要不断与判别器竞争。
- 判别器的损失 (Discriminator Loss)：

    - 初期较低，表示判别器能够轻松区分真实样本和生成样本。
    - 随着生成器的改进，判别器的损失逐渐上升，趋于约 0.5（随机猜测的水平）。
  
两者在一个理想的平衡点上达到动态稳定：生成器和判别器互相逼近彼此的最优性能。

**优化原理**：生成网络和判别网络有了损失函数，就可以基于各自的损失函数，利用误差反向传播（Backpropagation）(BP)反向传播算法和最优化方法(如梯度下降法)来实现参数的调整），不断提高生成网络和判别网络的性能（最终生成网络和判别网络的成熟状态就是学习到了合理的映射函数）。

### DCGAN

## 数据准备

In [37]:
# 加载图像并生成批次数据
def generator(data_path, batch_size):
    """
    读取图像，并生成批次数据

    参数说明
    ----------
    data_path : {str}
        图像文件夹地址
    batch_size : {int} 
        输入数据的批次大小，正整数

    返回值
    -------
    data_loader : {torch.utils.data.dataloader.DataLoader}
        数据加载器，[批次，目标，特征时间编码，目标时间编码]
    """
    # 定义图像变换操作
    transform = transforms.Compose([
        transforms.Resize((32, 32)),         # 调整图像大小
        transforms.ToTensor(),                  # 转换为张量
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 标准化
    ])
    
    dataset = datasets.CIFAR10(root=data_path, download=True, transform=transform)
    # 筛选标签为1的索引
    indices = [i for i, (_, label) in enumerate(dataset) if label == 1]
    # 创建只包含汽车数据的子集
    dataset = Subset(dataset, indices)
    
    # dataset = datasets.ImageFolder(root=data_path, transform=transform)
    print(f"图像个数：{len(dataset)}, 尺寸：{dataset[0][0].shape}")
    
    # 数据加载器
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # 查看一个批次数据
    images, labels = next(iter(dataloader))
    print(f"图像批次大小: {images.shape}")
    print(f"标签批次大小: {labels.shape}")
    print(f"图像批次个数: {len(dataloader)}")

    return dataloader

In [38]:
# 训练集
params1 = {
    "data_path": "../../../../../data/02.cv/cifar-10/",
    "batch_size": 128,
}
print("训练集：")
data_loader = generator(**params1)

训练集：
Files already downloaded and verified
图像个数：5000, 尺寸：torch.Size([3, 32, 32])
图像批次大小: torch.Size([128, 3, 32, 32])
标签批次大小: torch.Size([128])
图像批次个数: 40


## 模型定义

In [50]:
# 定义生成器网络
# class Generator(nn.Module):
#     def __init__(self, latent_dim, channels):
#         super(Generator, self).__init__()
#         self.model = nn.Sequential(
#             # 1. 全连接层：将潜在向量 (latent_dim) 投影并展平为形状 (128, 8, 8)
#             nn.Linear(latent_dim, 128 * 8 * 8),
#             nn.LeakyReLU(0.3, inplace=True),
#             nn.Unflatten(1, (128, 8, 8)),  # 转换为形状 (batch, 128, 8, 8)

#             # 2. 转置卷积层：第一次上采样，(128, 8, 8) -> (128, 16, 16)
#             nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(0.3, inplace=True),

#             # 3. 转置卷积层：第二次上采样，(128, 16, 16) -> (64, 32, 32)
#             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
#             nn.BatchNorm2d(64),
#             nn.LeakyReLU(0.3, inplace=True),

#             # 4. 卷积层：输出通道调整为图像通道数 (64, 32, 32) -> (3, 32, 32)
#             nn.Conv2d(64, channels, kernel_size=3, stride=1, padding=1),
#             nn.Tanh()  # 将输出范围限制在 [-1, 1]
#         )

#     def forward(self, z):
#         return self.model(z)

class Generator(nn.Module):
    def __init__(self, latent_dim, channels):
        super(Generator, self).__init__()

        self.latent_dim = latent_dim

        self.model = nn.Sequential(
            # 1. 全连接层，生成16*16*128的特征图，输出维度：(batch_size, 128 * 16 * 16)
            nn.Linear(latent_dim, 128 * 16 * 16),  
            nn.LeakyReLU(0.2, inplace=True),
            nn.Unflatten(1, (128, 16, 16)),  # 重塑为特征图，输出维度：(batch_size, 128, 16, 16)
            
            # 2. 卷积操作，输出维度：(batch_size, 256, 16, 16)
            nn.Conv2d(128, 256, kernel_size=5, padding=2),  
            nn.LeakyReLU(0.2, inplace=True),

            # 3. 上采样为32x32，输出维度：(batch_size, 256, 32, 32)
            nn.ConvTranspose2d(256, 256, kernel_size=4, stride=2, padding=1),  
            nn.LeakyReLU(0.2, inplace=True),

            # 4. 卷积操作，输出维度：(batch_size, 256, 32, 32)
            nn.Conv2d(256, 256, kernel_size=5, padding=2),  
            nn.LeakyReLU(0.2, inplace=True),

            # 5. 再次卷积操作，输出维度：(batch_size, 256, 32, 32)
            nn.Conv2d(256, 256, kernel_size=5, padding=2),  
            nn.LeakyReLU(0.2, inplace=True),

            # 6. 输出图像，padding=3保持输出尺寸不变，输出维度：(batch_size, channels, 32, 32)
            nn.Conv2d(256, channels, kernel_size=7, padding=3),  
            nn.Tanh()  # 输出激活函数
        )

    def forward(self, x):
        return self.model(x)

# 定义判别器网络
class Discriminator(nn.Module):
    def __init__(self, channels):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            # 1. 卷积层：初始通道从图像通道数增加到 128，(3, 32, 32) -> (128, 32, 32)
            nn.Conv2d(channels, 128, 3, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # 2. 卷积层：下采样，(128, 32, 32) -> (128, 16, 16)
            nn.Conv2d(128, 128, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # 3. 卷积层：下采样，(128, 16, 16) -> (128, 8, 8)
            nn.Conv2d(128, 128, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # 4. 卷积层：下采样，(128, 8, 8) -> (128, 4, 4)
            nn.Conv2d(128, 128, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # 5. 展平层：将 (128, 4, 4) 展平为向量 (128 * 4 * 4)
            nn.Flatten(),
            nn.Dropout(0.4),  # 添加 Dropout 防止过拟合

            # 6. 全连接层：映射到单个输出值 (即概率)
            nn.Linear(128 * 4 * 4, 1),
            nn.Sigmoid()  # 使用 Sigmoid 输出范围在 [0, 1]
        )

    def forward(self, img):
        return self.model(img)

## 模型训练

In [61]:
def train(train_args, generator_args, discriminator_args):
    # 参数配置
    generator_name = train_args['generator_name']  # 生成器模型名称
    discriminator_name = train_args['discriminator_name']  # 判别器模型名称
    data_loader = train_args['data_loader']  # 训练集
    n_epochs = train_args['n_epochs']  # 训练次数
    learning_rate = train_args['learning_rate']  # 学习率
    model_path = train_args['model_path']  # 模型保存路径
    image_path = train_args['image_path']  # 生成图像路径
    verbose = train_args['verbose']  # 打印训练过程
    plots = train_args['plots']  # 绘制损失图
    device = train_args['device']  # 可选'cuda'和'cpu'
    patience = train_args['patience']
    clip_value = train_args['clip_value'] # 裁剪值
    loss = train_args['loss'] # 损失函数
    image_size = train_args['image_size'] # 图像尺寸
    latent_dim = generator_args['latent_dim'] # 潜在向量维度
    channels = generator_args['channels'] # 颜色通道数

    # 检查可用device
    device = torch.device(device)

    # 设置早停
    class EarlyStopping():
        def __init__(self, patience=7, verbose=False, delta=0):
            self.patience = patience  # 连续超限次数，如果满足条件，则早停
            self.verbose = verbose
            self.counter = 0
            self.best_score = None
            self.early_stop = False
            self.val_loss_min = np.Inf
            self.delta = delta

        def __call__(self, val_loss, model, path):
            score = -val_loss
            if self.best_score is None:
                self.best_score = score
                self.save_checkpoint(val_loss, model, path)
            elif score < self.best_score + self.delta:
                self.counter += 1
                print(
                    f'EarlyStopping counter: {self.counter} out of {self.patience}')
                if self.counter >= self.patience:
                    self.early_stop = True
            else:
                self.best_score = score
                self.save_checkpoint(val_loss, model, path)
                self.counter = 0

        def save_checkpoint(self, val_loss, model, path):
            if self.verbose:
                print(
                    f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
            self.val_loss_min = val_loss
    early_stopping = EarlyStopping(patience=patience, verbose=verbose)

    # 设置保存模型路径
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # 定义模型和损失函数
    generator = generator_name(**generator_args).to(device)
    discriminator = discriminator_name(**discriminator_args).to(device)

    optimizer_G = optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
    optimizer_D = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
    criterion = loss

    # 损失函数值
    G_losses, D_losses = [], []

    for epoch in tqdm(range(n_epochs)):
        generator.train()
        discriminator.train()
        total_G_loss = 0
        total_D_loss = 0
        for i, (imgs, _) in enumerate(data_loader):
            # 将输入数据移至device
            imgs = imgs.to(device)

            # 创建真实标签和假标签
            real_labels = torch.ones(imgs.size(0), 1).to(device)  # 真实图像标签为 1
            fake_labels = torch.zeros(imgs.size(0), 1).to(device) # 生成图像标签为 0

            # 训练判别器
            z = torch.randn(imgs.size(0), latent_dim).to(device)  # 从标准正态分布中采样潜在向量
            fake_imgs = generator(z)  # 使用生成器生成假图像
    
            real_loss = criterion(discriminator(imgs), real_labels)  # 判别器对真实图像的损失
            fake_loss = criterion(discriminator(fake_imgs.detach()), fake_labels)  # 判别器对假图像的损失
            loss_D = real_loss + fake_loss  # 判别器总损失
    
            optimizer_D.zero_grad()  # 清空梯度
            loss_D.backward()  # 反向传播
            # torch.nn.utils.clip_grad_norm_(discriminator.parameters(), clip_value) # 判别器梯度裁剪
            optimizer_D.step()  # 更新判别器参数
    
            # 训练生成器
            loss_G = criterion(discriminator(fake_imgs), real_labels)  # 生成器希望生成的图像被判别为真实
    
            optimizer_G.zero_grad()  # 清空梯度
            loss_G.backward()  # 反向传播
            # torch.nn.utils.clip_grad_norm_(generator.parameters(), clip_value) # 生成器梯度裁剪
            optimizer_G.step()  # 更新生成器参数

            # 计算每个batch的loss和
            total_G_loss += loss_G.item()
            total_D_loss += loss_D.item()

        # 计算每个epoch的损失平均
        avg_G_loss = total_G_loss / len(data_loader)
        avg_D_loss = total_D_loss / len(data_loader)

        # 记录所有epoch的loss
        G_losses.append(avg_G_loss)
        D_losses.append(avg_D_loss)

        # 打印训练过程
        print(
            f'Epoch [{epoch}/{n_epochs}], Generator Loss: {avg_G_loss:.4f}, Discriminator Loss: {avg_D_loss:.4f}')

        # 每个 epoch 保存生成的图片
        fake_imgs = fake_imgs.view(fake_imgs.size(0), channels, image_size, image_size)
        vutils.save_image(fake_imgs[:25], f"{image_path}/output_epoch_{epoch+1}.png", nrow=5, normalize=True)

        # 设置早停，保存生成器
        early_stopping(avg_G_loss, generator, model_path)
        if early_stopping.early_stop:
            print("Early stopping!")
            break

    # 绘制损失函数图
    def plot_loss(G_losses, D_losses):
        plt.figure(figsize=(10, 5))
        plt.style.use('seaborn-v0_8-paper')  # 绘制背景色
        plt.grid(axis='y', linewidth=0.35)  # 绘制网格
        plt.plot(G_losses, linestyle='-', color='#11b3b6')
        plt.plot(D_losses, linestyle='-', color='#f14643')
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Training and Validation Progress")
        plt.legend(["Generator", "Discriminator"])
        plt.show()
    if plots:
        plot_loss(G_losses, D_losses)

    return generator

In [None]:
# 构造参数字典
params2 = {
    "train_args": {
        "generator_name": Generator,
        "discriminator_name": Discriminator,
        "data_loader": data_loader,
        "n_epochs": 100,
        "patience": 20,
        "learning_rate": 0.0004,
        "model_path": "../outputs/best_models/DCGAN",
        "image_path": "../outputs/images",
        "device": 'cuda',
        "loss": nn.BCELoss(),
        "verbose": True,
        "plots": True,
        "clip_value": 1.0,
        "image_size": 32,
    },
    "generator_args": {
        'latent_dim': 32,
        'channels': 3,
    },
    "discriminator_args": {
        'channels': 3,
    },
}
generator = train(**params2)

  1%|▊                                                                             | 1/100 [14:43<24:17:21, 883.24s/it]

Epoch [0/100], Generator Loss: 4.2336, Discriminator Loss: 0.7062
Validation loss decreased (inf --> 4.233615).  Saving model ...


## 图像生成