In [6]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import random
import numpy as np
import torch.nn.functional as F

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class Generator(nn.Module):
    
    def __init__(self, d, n_noise):  # 1-d vector   d=31, n_noise=31
        super(Generator, self).__init__()
        self.linear1 = nn.Linear(n_noise, d, bias=True)
        self.bn1 = nn.BatchNorm1d(d)
        self.linear2 = nn.Linear(d, d, bias=True)
        self.bn2 = nn.BatchNorm1d(d)
        self.linear3 = nn.Linear(d, d, bias=True)
        self.bn3 = nn.BatchNorm1d(d)

    
    def forward(self, noise):  # noise=(8, 31)
        x = torch.tanh(self.bn1(self.linear1(noise)))  # (8,31)->(8,31)
        x = torch.tanh(self.bn2(self.linear2(x)))      # (8,31)->(8,31)
        x = torch.sigmoid(self.bn3(self.linear3(x)))   # (8,31)->(8,31)
        # x = F.softmax(self.bn3(self.linear3(x)), dim=1)   # (8,31)->(8,31)
        return x


class Discriminator(nn.Module):
    
    def __init__(self, d):  # d=31
        super(Discriminator, self).__init__()
        self.linear1 = nn.Linear(d, d, bias=True)
        self.linear2 = nn.Linear(d, 1, bias=True)

    
    def forward(self, dec):   # （8， 31）
        x = torch.tanh(self.linear1(dec)) # (8, 31)->(8, 31)
        x = torch.sigmoid(self.linear2(x)) # (8, 31)->(8, 1)
        return x


class GAN(object):# d=31, batchsize=8, lr=0.0001, epoches=200, n_noise=31
    def __init__(self, d, batchsize, lr, epoches, n_noise):   
        self.d = d
        self.n_noise = n_noise
        self.BCE_loss = nn.BCELoss()
        self.G = Generator(self.d, self.n_noise)
        self.D = Discriminator(self.d)
        # self.G = Generator(self.d, self.n_noise).to(device)
        # self.D = Discriminator(self.d).to(device)
        self.G.cpu()
        self.D.cpu()
        self.G_optimizer = optim.Adam(self.G.parameters(), 4*lr)
        self.D_optimizer = optim.Adam(self.D.parameters(), lr)
        self.epoches = epoches
        self.batchsize = batchsize

    def train(self, pop_dec, labels, samples_pool):  # pop_dec.shape=(100, 31), labels.shape=(100, 1), samples_pool.shape=(10, 31)
        self.D.train()     # samples_pool，是当前种群中表现最好的10个解，计算他们的均值和方差，用以生成随机噪声，即作为随机噪声的均值和方差
        self.G.train()
        n, d = np.shape(pop_dec)  # n=100,  d=31
        indices = np.arange(n)  # indices=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,..., 98, 99])
        
        center = np.mean(samples_pool, axis=0)  # (31,1)  axis=0，对第一个维度求均值    下面的 cov 矩阵提供了一个关于这10个样本在31个特征上相互关系的全面视图。
        cov = np.cov(samples_pool[:10, :].reshape((d, samples_pool[:10, :].size // d)))#  (10, 31)->(31, 10)  conv=(31,31)  np.cov 函数用于计算协方差矩阵   samples_pool.shape=(10, 31),   
        iter_no = (n + self.batchsize - 1) // self.batchsize  # batchsize=8   n=100  iter_no:代表着在给定的设置中，需要多少批（batch）迭代来处理所有 n 个样本。其中，每个批的大小由 self.batchsize（在这个例子中是8）确定。

        for epoch in range(self.epoches): # epoches=200
            g_train_losses = 0

            for iteration in range(iter_no):  # iter_no=13  一共有13个batch, 每个batch有8个样本

                
                self.D.zero_grad()
                given_x = pop_dec[iteration * self.batchsize: (1 + iteration) * self.batchsize, :]   # 一个batchsize的解   given_x=(8, 31)
                given_y = labels[iteration * self.batchsize: (1 + iteration) * self.batchsize]   # 对应的一个batchsize的label  given_y=(8,1)
                batch_size = np.shape(given_x)[0]  # 因为最后一个batch可能没有8个，所以这里要记录一下batch_size的大小

                given_x_ = Variable(torch.from_numpy(given_x).cpu()).float()
                given_y = Variable(torch.from_numpy(given_y).cpu()).float()
                # given_x_ = Variable(torch.from_numpy(given_x).to(device)).float()
                # given_y = Variable(torch.from_numpy(given_y).to(device)).float()
                # 在Pytorch0.4.0及以后，Tensor和Variable已经合并
                # given_x_ = torch.from_numpy(given_x).to(device).float()   # numpy->tensor   (8, 31)
                # given_y = torch.from_numpy(given_y).to(device).float()    # （8，1）
                # 注意上面的given_x_, given_y都是真实的数据
                # d_results_real = self.D(given_x_.detach())   # 这里应该是不需要detach操作，因为given_x_不是可学习的参数
                d_results_real = self.D(given_x_)   # xwf   

                # 这里的fake_x就是噪声，将fake_x经过G来生成假的数据, fake_y都是random出来的数据
                fake_x = np.random.multivariate_normal(center, cov, batch_size)  # （8， 31）从噪声出发
                fake_x = torch.from_numpy(np.maximum(np.minimum(fake_x, np.ones((batch_size, self.d))),
                                                         np.zeros((batch_size, self.d))))

                fake_y = Variable(torch.zeros((batch_size, 1)).cpu())
                fake_x_ = Variable(fake_x.cpu()).float()
                # fake_y = torch.zeros((batch_size, 1)).to(device)   # 因为是假的数据嘛，所以fake_y都是0
                # fake_x_ = fake_x.to(device).float()

                # g_results = self.G(fake_x_.detach())  # g_results=(8,31)   这里写错了，感觉应该是g_results=self.G(fake_x_)    d_results_fake=self.D(g_results.detach)
                # d_results_fake = self.D(g_results)  # 因为这里通过g_results会涉及到G的更新，如果这里也设置g_results，则无法梯度回传去更新G
                g_results = self.G(fake_x_)          # xwf
                d_results_fake = self.D(g_results.detach())

                d_train_loss = self.BCE_loss(d_results_real, given_y) + \
                               self.BCE_loss(d_results_fake, fake_y)  
                d_train_loss.backward()
                self.D_optimizer.step()

                
                self.G.zero_grad()
                fake_x = np.random.multivariate_normal(center, cov, batch_size)
                fake_x = torch.from_numpy(np.maximum(np.minimum(fake_x, np.ones((batch_size, self.d))),
                                                     np.zeros((batch_size, self.d))))
                fake_x_ = Variable(fake_x.cpu()).float()
                fake_y = Variable(torch.ones((batch_size, 1)).cpu())
                # fake_x_ = fake_x.to(device).float()
                # fake_y = torch.ones((batch_size, 1)).to(device)  # 这里你希望G生成的内容经过判别器后能够尽可能地接近1，说明生成的就越真实
                g_results = self.G(fake_x_)
                d_results = self.D(g_results)
                g_train_loss = self.BCE_loss(d_results, fake_y)   
                g_train_loss.backward()
                self.G_optimizer.step()
                g_train_losses += g_train_loss.cpu()
                # g_train_losses += g_train_loss.item()

            print("Epoch[{}], loss: {:.5f}".format(epoch, g_train_losses))

            random.shuffle(indices)
            pop_dec = pop_dec[indices, :]   # 感觉这里应该加上label = labels[indices, :]
            label = labels[indices, :]   #  xwf

    def generate(self, sample_noises, population_size):  # sample_noises.shape=(10, 31)  population_size=100

        self.G.eval()  

        center = np.mean(sample_noises, axis=0).T   # shape=(31,)
        cov = np.cov(sample_noises.T)   # (31, 31)
        batch_size = population_size    # bs = 100

        noises = np.random.multivariate_normal(center, cov, batch_size)   # (100, 31)
        noises = torch.from_numpy(np.maximum(np.minimum(noises, np.ones((batch_size, self.d))),
                                                      np.zeros((batch_size, self.d))))
        # noises = noises.to(device).float() # 数据移到GPU    (batchsize,n_sample)=(100, 31)   一个batch里面有31个样本，就要预测31个结果，这里有100个batchsize
        # with torch.no_grad(): #关闭autograd
        #     decs = self.G(noises).cpu().data.numpy() # 生成结果并转回CPU     shape=(100, 31)
        decs = self.G(Variable(noises.cpu()).float()).cpu().data.numpy()
        return decs

    def discrimate(self, off):

        self.D.eval()  
        batch_size = off.shape[0]
        off = off.reshape(batch_size, 1, off.shape[1])
        
        x = Variable(torch.from_numpy(off).cpu(), volatile=True).float()
        d_results = self.D(x).cpu().data.numpy()
        # with torch.no_grad():
        #     x = torch.from_numpy(off).to(device).float()
        #     d_results = self.D(x).cpu().data.numpy()

        return d_results.reshape(batch_size)




In [3]:
import random 
import numpy as np 
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE

# 生成一个形状为(100, 31)的随机数组
random_array = np.random.rand(100, 31)

# 沿着数组的第二个轴（axis=1）计算每行的和
row_sums = np.sum(random_array, axis=1)

# 使用NumPy的广播（broadcasting）机制进行规范化
normalized_array = random_array / row_sums[:, np.newaxis]

# 指定positive_samples的索引
positive_indices = [0, 2, 5, 9, 10, 11, 30, 22, 32, 12]

# 创建包含所有索引的列表
all_indices = list(range(100))

# 从all_indices中移除positive_indices 
negative_indices = list(set(all_indices) - set(positive_indices))

# 提取positive_samples 和 negative_samples 
positive_samples = normalized_array[positive_indices, :]
negative_samples = normalized_array[negative_indices, :]
label = np.zeros((100, 1))
label[positive_indices, :] = 1



In [7]:
net = GAN(31, 8, 0.0001, 20, 31)#epoch=20

In [8]:
net.train(normalized_array, label, positive_samples)

Epoch[0], loss: 9.73695
Epoch[1], loss: 10.11586
Epoch[2], loss: 10.51387
Epoch[3], loss: 10.93344
Epoch[4], loss: 11.36037
Epoch[5], loss: 11.78094
Epoch[6], loss: 12.20822
Epoch[7], loss: 12.65934
Epoch[8], loss: 13.11994
Epoch[9], loss: 13.57548
Epoch[10], loss: 14.03508
Epoch[11], loss: 14.52316
Epoch[12], loss: 15.00477
Epoch[13], loss: 15.49404
Epoch[14], loss: 15.96527
Epoch[15], loss: 16.45486
Epoch[16], loss: 16.95815
Epoch[17], loss: 17.43817
Epoch[18], loss: 17.92923
Epoch[19], loss: 18.42580


In [10]:
new_sample = net.generate(positive_samples, 100)
print(new_sample.shape) # (100,31)

(100, 31)


In [11]:
import numpy as np

# 生成一个形状为 (100, 31) 的随机数组
# random_array = np.random.rand(100, 31)

# 检查每一维是否在 0 到 1 之间
is_within_range = np.all((new_sample >= 0) & (new_sample <= 1))

# 计算每个样本的 31 维之和
row_sums = np.sum(new_sample, axis=1)

# 检查每个样本的和是否等于 1
is_sum_equal_to_1 = np.all(np.isclose(row_sums, 1.0))

print(f"Are all values within [0, 1]? {is_within_range}")
print(f"Is the sum of each row equal to 1? {is_sum_equal_to_1}")


Are all values within [0, 1]? True
Is the sum of each row equal to 1? False
