In [1]:
import torch

# 加载数据
train_data = torch.load('data/German_data/train.pt')
valid_data = torch.load('data/German_data/valid.pt')

# 检查数据
print(train_data)
print(valid_data)


[(Status_of_existing_checking_account                            A12
Duration_in_month                                               48
Credit_history                                                 A32
Purpose                                                        A43
Credit_amount                                                 5951
Savings_account/bonds                                          A61
Present_employment_since                                       A73
Installment_rate_in_percentage_of_disposable_income              2
Personal_status_and_sex                                     female
Other_debtors/guarantors                                      A101
Present_residence_since                                          2
Property                                                      A121
Age_in_years                                                    22
Other_installment_plans                                       A143
Housing                                                     

In [2]:
import torch.nn as nn
import torch.optim as optim

class SimpleClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [5]:
# 分离特征、性别和标签
train_features = []
train_sex = []
train_labels = []

# 编码性别数据
sex_encoding = {"female": 0, "male": 1}

for item in train_data:
    features = item[0]
    sex = item[1]
    label = item[2]
    
    # 检查特征类型并进行必要的转换
    features = [float(f) if isinstance(f, (int, float)) else 0.0 for f in features]
    
    train_features.append(torch.tensor(features))  # 将特征转换为张量
    train_sex.append(sex_encoding[sex])  # 编码性别数据
    train_labels.append(label)

# 转换为Tensor
train_features = torch.stack(train_features)
train_sex = torch.tensor(train_sex)
train_labels = torch.tensor(train_labels)

# 打印结果
print("Train Features:", train_features)
print("Train Sex:", train_sex)
print("Train Labels:", train_labels)


Train Features: tensor([[ 0., 48.,  0.,  ...,  0.,  0.,  0.],
        [ 0., 12.,  0.,  ...,  0.,  0.,  1.],
        [ 0., 42.,  0.,  ...,  0.,  0.,  1.],
        ...,
        [ 0.,  6.,  0.,  ...,  0.,  0.,  1.],
        [ 0., 24.,  0.,  ...,  0.,  0.,  0.],
        [ 0., 36.,  0.,  ...,  0.,  0.,  1.]])
Train Sex: tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1,
        0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1,
        0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
        1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0,
        0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
        1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,

In [7]:
# 分离特征、性别和标签
valid_features = []
valid_sex = []
valid_labels = []

# 编码性别数据
sex_encoding = {"female": 0, "male": 1}

for item in valid_data:
    features = item[0]
    sex = item[1]
    label = item[2]
    
    # 检查特征类型并进行必要的转换
    features = [float(f) if isinstance(f, (int, float)) else 0.0 for f in features]
    
    valid_features.append(torch.tensor(features))  # 将特征转换为张量
    valid_sex.append(sex_encoding[sex])  # 编码性别数据
    valid_labels.append(label)

# 转换为Tensor
valid_features = torch.stack(valid_features)
valid_sex = torch.tensor(valid_sex)
valid_labels = torch.tensor(valid_labels)

# 打印结果
print("Valid Features:", valid_features)
print("Valid Sex:", valid_sex)
print("Valid Labels:", valid_labels)


Valid Features: tensor([[ 0.,  9.,  0.,  ...,  0.,  0.,  1.],
        [ 0., 24.,  0.,  ...,  0.,  0.,  0.],
        [ 0., 24.,  0.,  ...,  0.,  0.,  0.],
        ...,
        [ 0., 12.,  0.,  ...,  0.,  0.,  1.],
        [ 0., 45.,  0.,  ...,  0.,  0.,  0.],
        [ 0., 45.,  0.,  ...,  0.,  0.,  1.]])
Valid Sex: tensor([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
        1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
        1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1,
        1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.tanh(self.fc4(x))
        return x

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = torch.sigmoid(self.fc4(x))
        return x

# 定义模型参数
noise_dim = 100
output_dim = train_features.shape[1]

# 实例化模型
generator = Generator(noise_dim, output_dim)
discriminator = Discriminator(output_dim)

# 损失函数和优化器
criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.0001)  # 调整生成器学习率
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)  # 保持判别器学习率

# 学习率调度器
scheduler_g = optim.lr_scheduler.StepLR(optimizer_g, step_size=2000, gamma=0.5)
scheduler_d = optim.lr_scheduler.StepLR(optimizer_d, step_size=2000, gamma=0.5)

# 训练GAN
epochs = 10000
batch_size = 64



Epoch 1/20, Loss: 11.596967697143555, Val Loss: 24.175363540649414
Epoch 2/20, Loss: 24.11913299560547, Val Loss: 9.687271118164062
Epoch 3/20, Loss: 9.6766996383667, Val Loss: 9.894438743591309
Epoch 4/20, Loss: 9.5018949508667, Val Loss: 16.64630126953125
Epoch 5/20, Loss: 15.986230850219727, Val Loss: 17.594099044799805
Epoch 6/20, Loss: 16.896268844604492, Val Loss: 14.444722175598145
Epoch 7/20, Loss: 13.870067596435547, Val Loss: 8.227691650390625
Epoch 8/20, Loss: 7.897729396820068, Val Loss: 1.0384130477905273
Epoch 9/20, Loss: 1.0442836284637451, Val Loss: 6.332032203674316
Epoch 10/20, Loss: 6.323381423950195, Val Loss: 2.3170883655548096
Epoch 11/20, Loss: 2.3223519325256348, Val Loss: 5.004913330078125
Epoch 12/20, Loss: 4.801891326904297, Val Loss: 7.977474212646484
Epoch 13/20, Loss: 7.655579566955566, Val Loss: 8.165885925292969
Epoch 14/20, Loss: 7.836061477661133, Val Loss: 5.9629411697387695
Epoch 15/20, Loss: 5.720058917999268, Val Loss: 1.76763916015625
Epoch 16/20,

In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1024)
        self.fc2 = nn.Linear(1024, 2048)
        self.fc3 = nn.Linear(2048, 1024)
        self.fc4 = nn.Linear(1024,512)
        self.fc5 = nn.Linear(512,256)
        self.fc6 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = torch.tanh(self.fc6(x))
        return x

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = self.dropout(x)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = self.dropout(x)
        x = torch.sigmoid(self.fc3(x))
        return x




In [27]:
# 定义模型参数
noise_dim = 100
output_dim = train_features.shape[1]

# 实例化模型
generator = Generator(noise_dim, output_dim)
discriminator = Discriminator(output_dim)

# 损失函数和优化器
criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.0001)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.00005)  # 降低判别器学习率

# 学习率调度器
scheduler_g = optim.lr_scheduler.StepLR(optimizer_g, step_size=2000, gamma=0.5)
scheduler_d = optim.lr_scheduler.StepLR(optimizer_d, step_size=2000, gamma=0.5)

# 训练GAN
epochs = 2000
batch_size = 64

for epoch in range(epochs):
    for _ in range(len(train_features) // batch_size):
        # 训练判别器
        real_data = train_features[torch.randint(0, len(train_features), (batch_size,))]
        real_labels = torch.ones(batch_size, 1)
        
        # 向真实数据添加正态分布的噪声
        real_data += torch.randn(real_data.size()) * 0.05

        fake_data = generator(torch.randn(batch_size, noise_dim))
        fake_labels = torch.zeros(batch_size, 1)

        optimizer_d.zero_grad()
        outputs = discriminator(real_data)
        real_loss = criterion(outputs, real_labels)
        outputs = discriminator(fake_data.detach())
        fake_loss = criterion(outputs, fake_labels)
        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_d.step()

        # 训练生成器
        optimizer_g.zero_grad()
        fake_data = generator(torch.randn(batch_size, noise_dim))
        outputs = discriminator(fake_data)
        g_loss = criterion(outputs, real_labels)  # 希望判别器认为生成的数据是真的
        g_loss.backward()
        optimizer_g.step()

    # 更新学习率
    scheduler_g.step()
    scheduler_d.step()

    if epoch % 100 == 0:
        print(f'Epoch {epoch}/{epochs}, D Loss: {d_loss.item()}, G Loss: {g_loss.item()}')

print('Finished Training')

Epoch 0/2000, D Loss: 14.011220932006836, G Loss: 0.677771806716919
Epoch 100/2000, D Loss: 0.6676229238510132, G Loss: 0.7268048524856567
Epoch 200/2000, D Loss: 0.46740520000457764, G Loss: 0.9891179203987122
Epoch 300/2000, D Loss: 0.2787140905857086, G Loss: 1.4412370920181274
Epoch 400/2000, D Loss: 0.11774538457393646, G Loss: 2.251539468765259


KeyboardInterrupt: 

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 1024)
        self.bn3 = nn.BatchNorm1d(1024)
        self.fc4 = nn.Linear(1024, output_dim)
        self.apply(self.weights_init)

    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = F.relu(self.bn3(self.fc3(x)))
        x = torch.tanh(self.fc4(x))
        return x

class Critic(nn.Module):
    def __init__(self, input_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 1)
        self.dropout = nn.Dropout(0.3)
        self.apply(self.weights_init)

    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = self.dropout(x)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

def gradient_penalty(critic, real_data, fake_data):
    batch_size = real_data.size(0)
    epsilon = torch.rand(batch_size, 1).to(real_data.device)
    epsilon = epsilon.expand_as(real_data)

    interpolated = epsilon * real_data + (1 - epsilon) * fake_data
    interpolated.requires_grad_(True)

    prob_interpolated = critic(interpolated)

    gradients = torch.autograd.grad(
        outputs=prob_interpolated,
        inputs=interpolated,
        grad_outputs=torch.ones(prob_interpolated.size()).to(real_data.device),
        create_graph=True,
        retain_graph=True,
    )[0]

    gradients = gradients.view(batch_size, -1)
    gradient_norm = gradients.norm(2, dim=1)
    gradient_penalty = ((gradient_norm - 1) ** 2).mean()

    return gradient_penalty

# 数据规范化（示例）
train_features = (train_features - train_features.mean()) / train_features.std()

# 定义模型参数
noise_dim = 100
output_dim = train_features.shape[1]

# 实例化模型
generator = Generator(noise_dim, output_dim)
critic = Critic(output_dim)

# 优化器
optimizer_g = optim.Adam(generator.parameters(), lr=0.0001, betas=(0.5, 0.9))
optimizer_c = optim.Adam(critic.parameters(), lr=0.0001, betas=(0.5, 0.9))

# 训练WGAN-GP
epochs = 10000
batch_size = 64
critic_iterations = 5
lambda_gp = 10

for epoch in range(epochs):
    for _ in range(len(train_features) // batch_size):
        for _ in range(critic_iterations):
            # 训练判别器
            real_data = train_features[torch.randint(0, len(train_features), (batch_size,))]
            fake_data = generator(torch.randn(batch_size, noise_dim))

            optimizer_c.zero_grad()
            real_loss = -torch.mean(critic(real_data))
            fake_loss = torch.mean(critic(fake_data.detach()))
            gp = gradient_penalty(critic, real_data, fake_data)
            c_loss = real_loss + fake_loss + lambda_gp * gp
            c_loss.backward()
            optimizer_c.step()

        # 训练生成器
        optimizer_g.zero_grad()
        fake_data = generator(torch.randn(batch_size, noise_dim))
        g_loss = -torch.mean(critic(fake_data))
        g_loss.backward()
        optimizer_g.step()

    if epoch % 100 == 0:
        print(f'Epoch {epoch}/{epochs}, C Loss: {c_loss.item()}, G Loss: {g_loss.item()}')

print('Finished Training')


Epoch 0/10000, C Loss: -0.3072507083415985, G Loss: 0.10480310767889023
Epoch 100/10000, C Loss: -2.113337755203247, G Loss: -0.9715560674667358
Epoch 200/10000, C Loss: -2.7326202392578125, G Loss: -1.057898759841919


KeyboardInterrupt: 