In [3]:
import torch
import numpy as np
"""
发送长度为K的导频符号序列，其中发送符号x[i]从发送集S中等概率随机选取，接收端接受y[i]
训练数据即T[i]=(S[i],y[i])
收集多个i时刻的信道输入状态向量与信道输出（K个），从而得到训练数据集T=[T1,T2,...Ti]
"""

'\n发送长度为K的导频符号序列，其中发送符号x[i]从发送集S中等概率随机选取，接收端接受y[i]\n训练数据即T[i]=(S[i],y[i])\n收集多个i时刻的信道输入状态向量与信道输出（K个），从而得到训练数据集T=[T1,T2,...Ti]\n'

In [15]:
"""
Step1. 开始构建数据生成器，产生S[i]与y[i]的数据对
根据后面的看法，似乎所有的观测值有一个特定的集合，y[i]只会在特定集合内获取
最终的网络输出相当于成为了一个分类问题
"""
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class NumbersDataset(Dataset):
    def __init__(self, block_length):
        data_size = pow(2, block_length)
        self.samples = np.random.binomial(1, 0.5, [data_size, 1, block_length])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        transmitter = self.samples[idx]
        # transmitter = torch.from_numpy(transmitter)
        transmitter = torch.FloatTensor(transmitter)
        # receiver, true q
        receiver = torch.ones(1,10)
        return transmitter, receiver

In [18]:
dataset = NumbersDataset(4)
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
print(len(dataset))
print(dataset[0])
print(next(iter(dataloader)))

16
(tensor([[1., 1., 1., 1.]]), tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]))
[tensor([[[0., 1., 1., 1.]],

        [[1., 1., 0., 0.]],

        [[1., 1., 1., 1.]]]), tensor([[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]])]


In [90]:
"""
use a fake function to replace the viterbiNet
g() -> p -> q(1,10)
"""
def fakeFunction(batch_size, p):
    return torch.ones(batch_size, 1,10).to(device)

In [66]:
p = torch.ones(1,15)
fakeFunction(p)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [92]:
"""
Step2. 开始构建两个全连接神经网络D和G，使参数满足标准高斯分布
所述的生成器G是一个全连接神经网络
"""
import torch.nn as nn
from torchsummary import summary

class G(nn.Module):
    def __init__(self, p):
        super(G, self).__init__()
        self.FC1 = nn.Sequential(nn.Linear(4,256), nn.LeakyReLU())
        self.FC2 = nn.Sequential(nn.Linear(256,512), nn.LeakyReLU())
        self.FC3 = nn.Sequential(nn.Linear(512,1024), nn.LeakyReLU())
        self.FC4 = nn.Linear(1024,p)
    def forward(self, s):
#         隐含层层数为Ng
        s = self.FC1(s)
        s = self.FC2(s)
        s = self.FC3(s)
        s = self.FC4(s)
        return nn.Softmax(dim=-1)(s)

class D(nn.Module):
    def __init__(self):
        super(D, self).__init__()
        self.FC1a = nn.Sequential(nn.Linear(10,256), nn.LeakyReLU())
        self.FC1b = nn.Sequential(nn.Linear(4,256), nn.LeakyReLU())
        self.FC2 = nn.Sequential(nn.Linear(512,512), nn.LeakyReLU())
        self.FC3 = nn.Sequential(nn.Linear(512,256), nn.LeakyReLU())
        self.FC4 = nn.Sequential(nn.Linear(256,1), nn.Sigmoid())

    def forward(self, q, s):
        # print(q.size())
        # print(s.size())
        s1a = self.FC1a(q)
        s1b = self.FC1b(s)
        s = torch.cat((s1a,s1b),-1)
        # print(s.size())
        s = self.FC2(s)
        s = self.FC3(s)
        s = self.FC4(s)
        return s

In [52]:
'''
在这里要
1. 输出网络的结构(print,torch.summary两种)
2. 查看网络的现有每层参数
3. 构造数据，喂入网络，查看网络的输出
'''

g = G(4)
print(g)
for name,parameters in g.named_parameters():
    print(name,':',parameters.size())
#     print(parameters)

a = torch.ones(1,4)
print(a)
print(g.forward(a))

G(
  (FC1): Sequential(
    (0): Linear(in_features=4, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC2): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC3): Sequential(
    (0): Linear(in_features=512, out_features=1024, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC4): Linear(in_features=1024, out_features=4, bias=True)
)
FC1.0.weight : torch.Size([256, 4])
FC1.0.bias : torch.Size([256])
FC2.0.weight : torch.Size([512, 256])
FC2.0.bias : torch.Size([512])
FC3.0.weight : torch.Size([1024, 512])
FC3.0.bias : torch.Size([1024])
FC4.weight : torch.Size([4, 1024])
FC4.bias : torch.Size([4])
tensor([[1., 1., 1., 1.]])
tensor([[0.2704, 0.2420, 0.2414, 0.2462]], grad_fn=<SoftmaxBackward>)


In [34]:
d = D()
print(d)
b = torch.ones(1,10)
s = torch.ones(1,4)
print(s,b)
print(d.forward(b,s))

D(
  (FC1a): Sequential(
    (0): Linear(in_features=10, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC1b): Sequential(
    (0): Linear(in_features=4, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC3): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC4): Sequential(
    (0): Linear(in_features=256, out_features=1, bias=True)
    (1): Sigmoid()
  )
)
tensor([[1., 1., 1., 1.]]) tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
torch.Size([1, 256])
torch.Size([1, 256])
tensor([[0.5011]], grad_fn=<SigmoidBackward>)


In [45]:
g.cuda()
summary(g,(1,4))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 256]           1,280
         LeakyReLU-2               [-1, 1, 256]               0
            Linear-3               [-1, 1, 512]         131,584
         LeakyReLU-4               [-1, 1, 512]               0
            Linear-5              [-1, 1, 1024]         525,312
         LeakyReLU-6              [-1, 1, 1024]               0
            Linear-7                [-1, 1, 10]          10,250
           Softmax-8                [-1, 1, 10]               0
Total params: 668,426
Trainable params: 668,426
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 2.55
Estimated Total Size (MB): 2.58
----------------------------------------------------------------


In [42]:
d = D()
d.cuda()
summary(d,[(1,10),(1,4)])

torch.Size([2, 1, 10])
torch.Size([2, 1, 4])
torch.Size([2, 1, 256])
torch.Size([2, 1, 256])
torch.Size([2, 1, 512])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 256]           2,816
         LeakyReLU-2               [-1, 1, 256]               0
            Linear-3               [-1, 1, 256]           1,280
         LeakyReLU-4               [-1, 1, 256]               0
            Linear-5               [-1, 1, 512]         262,656
         LeakyReLU-6               [-1, 1, 512]               0
            Linear-7               [-1, 1, 256]         131,328
         LeakyReLU-8               [-1, 1, 256]               0
            Linear-9                 [-1, 1, 1]             257
          Sigmoid-10                 [-1, 1, 1]               0
Total params: 398,337
Trainable params: 398,337
Non-trainable params: 0
------------------------------------------

In [51]:
g = G(10)
g.cuda()
for i,data in enumerate(dataloader, 0):
    print(i)
    device = torch.device("cuda:0")
    trainx = data[0].to(device)
    y = g.forward(trainx)
    print(y)
    print(y.sum())

0
tensor([[[0.1046, 0.0961, 0.0924, 0.1008, 0.0950, 0.1078, 0.1037, 0.1027,
          0.0999, 0.0970]],

        [[0.1052, 0.0975, 0.0939, 0.0999, 0.0958, 0.1074, 0.1010, 0.1018,
          0.1006, 0.0970]],

        [[0.1042, 0.0995, 0.0939, 0.0994, 0.0961, 0.1060, 0.1014, 0.1025,
          0.1000, 0.0970]]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(3., device='cuda:0', grad_fn=<SumBackward0>)
1
tensor([[[0.1062, 0.0928, 0.0939, 0.1001, 0.0980, 0.1108, 0.1021, 0.0998,
          0.0982, 0.0980]],

        [[0.1026, 0.0988, 0.0965, 0.1012, 0.0978, 0.1046, 0.1027, 0.1018,
          0.0989, 0.0951]],

        [[0.1031, 0.0965, 0.0944, 0.1008, 0.0993, 0.1052, 0.1037, 0.1022,
          0.0995, 0.0953]]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(3., device='cuda:0', grad_fn=<SumBackward0>)
2
tensor([[[0.1052, 0.0949, 0.0961, 0.1001, 0.0965, 0.1097, 0.1002, 0.0999,
          0.0987, 0.0987]],

        [[0.1022, 0.0956, 0.0948, 0.1013, 0.0985, 0.1064, 0.1047, 0.1009,
       

In [94]:
'''
Step3. 对两个网络进行初始化，按照高斯分布初始化参数结构
'''

# 初始化函数接受一个初始化过的网络作为参数输入，将其参数重新初始化为高斯分布
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('FC') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

NetG = G(12)
NetD = D()
NetG.cuda()
NetD.cuda()
# 【标记】关于此处参数是否高斯分布生效需要进一步做验证
NetG.apply(weights_init)
NetD.apply(weights_init)


D(
  (FC1a): Sequential(
    (0): Linear(in_features=10, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC1b): Sequential(
    (0): Linear(in_features=4, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC3): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (FC4): Sequential(
    (0): Linear(in_features=256, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

In [54]:
for name,parameters in g.named_parameters():
    print(name,':',parameters)
#     print(parameters)


FC1.0.weight : Parameter containing:
tensor([[-0.2813, -0.4514, -0.3028,  0.4451],
        [ 0.1432,  0.1365,  0.3029, -0.3435],
        [-0.2408,  0.0192,  0.1322, -0.2734],
        ...,
        [ 0.0901,  0.3240,  0.0126, -0.3547],
        [-0.0486, -0.3292, -0.4434, -0.2826],
        [ 0.4154,  0.1788,  0.0579, -0.0430]], requires_grad=True)
FC1.0.bias : Parameter containing:
tensor([-3.7050e-01, -2.8152e-01,  3.0605e-01, -3.4954e-01, -4.2278e-01,
         4.2647e-01, -3.4497e-01,  2.5541e-01,  4.6757e-01, -2.9368e-01,
         1.4880e-01, -7.6593e-03, -1.5279e-01, -2.5955e-01, -4.0801e-02,
         7.5985e-02,  5.6768e-02,  3.9169e-01,  4.3593e-01, -2.5765e-01,
        -7.0200e-02, -2.6391e-01, -4.7844e-01, -4.2165e-01,  2.3019e-02,
        -4.8557e-01, -1.0182e-01,  1.6846e-01, -3.8121e-01,  3.9228e-01,
         9.6484e-02, -2.7481e-01,  4.8452e-01,  1.7027e-01, -4.2788e-01,
        -3.2793e-01,  3.3946e-01, -2.9105e-01,  2.9447e-01, -3.0274e-01,
         1.0345e-01, -2.3761e-01, 

In [93]:
'''
Step4. 分别定义两个网络的损失函数和优化方法，进行训练
'''
# Initialize BCELoss function
criterion = nn.BCELoss()

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
# 这里产生一个固定的噪声，每轮学习完成后都使用固定噪声来进行输出
# fixed_noise = torch.randn(25, nz, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1.
fake_label = 0.

# Setup Adam optimizers for both G and D
lr = 0.0002
beta1 = 0.5
optimizerD = torch.optim.Adam(NetD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = torch.optim.Adam(NetG.parameters(), lr=lr, betas=(beta1, 0.999))


In [96]:
'''
Step5. 开始进行网络训练
首先对鉴别器进行训练，然后训练生成器
'''
# Training Loop
num_epochs = 25

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
ngpu = 1
nz = 100
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
print("Starting Training Loop...")
# For each epoch
# dataset = NumbersDataset(0, 50)
# dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
for epoch in range(num_epochs):
    # For each batch in the dataloader
    for i, data in enumerate(dataloader, 0):

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        NetD.zero_grad()
        # Format batch
        data_train = data[0].to(device)
        data_label = data[1].to(device)

        batch_size = data_train.size(0)
        label = torch.full((batch_size,), real_label, dtype=torch.float, device=device)

        # Forward pass real batch through D
        output = NetD(data_label, data_train).view(-1)
        # print(output.size())
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        # noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        p = NetG(data_train)
        noise_lable = fakeFunction(batch_size, p)
        noise_lable.to(device)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = NetD(noise_lable, data_train).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        NetG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = NetD(noise_lable, data_train).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())





Starting Training Loop...
[0/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7234	D(x): 0.4851	D(G(z)): 0.4851 / 0.4851
[1/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7240	D(x): 0.4848	D(G(z)): 0.4848 / 0.4848
[2/25][0/6]	Loss_D: 1.3871	Loss_G: 0.7215	D(x): 0.4860	D(G(z)): 0.4860 / 0.4860
[3/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7234	D(x): 0.4851	D(G(z)): 0.4851 / 0.4851
[4/25][0/6]	Loss_D: 1.3870	Loss_G: 0.7209	D(x): 0.4863	D(G(z)): 0.4863 / 0.4863
[5/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7232	D(x): 0.4852	D(G(z)): 0.4852 / 0.4852
[6/25][0/6]	Loss_D: 1.3870	Loss_G: 0.7209	D(x): 0.4863	D(G(z)): 0.4863 / 0.4863
[7/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7228	D(x): 0.4854	D(G(z)): 0.4854 / 0.4854
[8/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7241	D(x): 0.4848	D(G(z)): 0.4848 / 0.4848
[9/25][0/6]	Loss_D: 1.3873	Loss_G: 0.7245	D(x): 0.4846	D(G(z)): 0.4846 / 0.4846
[10/25][0/6]	Loss_D: 1.3872	Loss_G: 0.7245	D(x): 0.4846	D(G(z)): 0.4846 / 0.4846
[11/25][0/6]	Loss_D: 1.3871	Loss_G: 0.7222	D(x): 0.4857	D(G(z)): 0.4857 / 0.4857
[12/25][0/6]