In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
import torch.utils.data as tud
import torch.nn.functional as F
import matplotlib.pyplot as plt

from torch.autograd import Variable 
# Variables wrap a Tensor
#x = Variable(torch.ones(2, 2), requires_grad=True)
# Variable containing:
# 1  1
# 1  1
# [torch.FloatTensor of size 2x2]

import torch.optim as optim #Optimizer
from torch.utils.tensorboard import SummaryWriter #to print to tensorboard

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
#gpu if cuda exists, else run on cpu


  return torch._C._cuda_getDeviceCount() > 0


In [2]:
batch_size = 32


something = datasets.MNIST("", train=True, download=True,
                      transform = transforms.Compose([transforms.ToTensor()]))


train = datasets.MNIST("", train=True, download=True,
                      transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]))
test = datasets.MNIST("", train=False, download=True,
                     transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]))

trainset = tud.DataLoader(train, batch_size=32, shuffle=True)
testset = tud.DataLoader(test, batch_size=32, shuffle=True)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [36]:
class Discriminator(nn.Module):
    def __init__(self, img_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(img_dim, 1024)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2)
        self.fc4 = nn.Linear(self.fc3.out_features, 1)
    
    # forward method
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = F.dropout(x, 0.3)
        return torch.sigmoid(self.fc4(x))

    
    
class Generator(nn.Module):
    def __init__(self, noise_dim, img_dim):
        super(Generator, self).__init__()       
        self.fc1 = nn.Linear(noise_dim, 256)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features*2)
        self.fc4 = nn.Linear(self.fc3.out_features, img_dim)
    
    # forward method
    def forward(self, x): 
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        return torch.tanh(self.fc4(x)).view(-1, 1, 28, 28)

In [21]:
noise_dim = 100
img_dim = 28*28


Disc = Discriminator(img_dim = img_dim).to(device)
Gen = Generator(noise_dim = noise_dim, img_dim = img_dim).to(device)


In [27]:
Gen

Generator(
  (fc1): Linear(in_features=100, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)

In [28]:
Disc

Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)

In [29]:
#loss, doc. @ https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html
criterion = nn.BCELoss(reduction='mean')  #synt: criterion(network predicted output, target)

#optimizer
lr = 0.0002 

Disc_optimizer = optim.Adam(Disc.parameters(), lr = lr)
Gen_optimizer = optim.Adam(Gen.parameters(), lr = lr)

writer_fake = SummaryWriter(f"logs/fake")
writer_real = SummaryWriter(f"logs/real")


fixed_z = Variable(torch.randn(batch_size, noise_dim))
step = 0 #for the tensorboard writer


In [30]:
#synt: criterion(network output, target), for the discriminator we have a loss of real and fake
#data from the generator, we optimize the network so that it can better tell by 
#we feed our first loss term the real data so that it can get good in classifying this data as ones
#and feed the second loss the fake generator data, so that it can get good classifying this data as 
#zeros


#the dicriminator:

batch_size = 32

def Disc_train(x):
    
    Disc.zero_grad()
    
    #using real data:
    x_real, y_real = x.view(-1, 28*28), torch.ones(batch_size, 1)
    x_real, y_real = Variable(x_real.to(device)), Variable(y_real.to(device))
    
    D_output_real = Disc(x_real) #output of the disctiminator's network with real data  ###&&&&&&&&&&&&&&&&& added .view(-1)
    D_real_loss = criterion(D_output_real, y_real) 
    D_real_score = D_output_real
    
    
   
    #using Generator (fake) data for the training:
    
    z = Variable(torch.randn(batch_size, noise_dim).to(device)) #100 batches for every Generator input
    x_fake, y_fake = Gen(z), Variable(torch.zeros(batch_size, 1).to(device))
    
    D_output_fake = Disc(x_fake)                                                     #&&&&&&&&&&&&&&&&&&&&&& added .view(-1)
    D_fake_loss = criterion(D_output_fake, y_fake) #----> max log(D(x)) + log(1 - D(G(z))) (minimize w/ minus)
    
    D_fake_loss = D_output_fake
    
    #originally we want to maximize and for the BCELoss, there's a minus sign indicating 
    #minimizing that same loss for the disc = maximizing the original loss in the paper
    #----> max log(D(x)) + log(1 - D(G(z))) (minimize when we add the minus sign)
    #so for real, maximize for ones and for fake maximize for zeros
    
    #backpropagating
    D_loss = (D_real_loss + D_fake_loss)
    
    
    #DEE = torch.sum(D_loss)
    D_loss.backward()
    
    
    Disc_optimizer.step()
    
    return D_loss.data.item()

NameError: name 'D_loss' is not defined

In [31]:
#the Generator:

def Gen_train(x):
    
    Gen.zero_grad()

    z = Variable(torch.randn(batch_size, noise_dim))
    y = Variable(torch.ones(batch_size, 1).to(device))

    G_output = Gen(z) # z --> Gen --> Disc(Gen) = D_output
    D_output = Disc(G_output)
    G_loss = criterion(D_output, y) #-----> min log(1 - D(G(z))) <-> max log(D(G(z)) (so, second term
    #is zero and first term we pass on D(G(z)) 

    #backpropagating
    
    #GEE = torch.sum(G_loss)
    G_loss.backward()
    Gen_optimizer.step()

    return G_loss.data.item()

In [37]:
epoch_num = 200
for epoch in range(1, epoch_num+1):
    Loss_D, Loss_G = [], []
    for batch_idx, (x,_) in enumerate(trainset):
        Loss_D.append(Disc_train(x))
        Loss_G.append(Gen_train(x))
    print('[%d/%d]: Loss_D: %.3f, Loss_G: %.3f' % (
            (epoch), epoch_num, torch.mean(torch.FloatTensor(Loss_D)), torch.mean(torch.FloatTensor(Loss_G))))
    
   

RuntimeError: grad can be implicitly created only for scalar outputs

In [None]:
 
    with torch.no_grad():
                fake = Gen(fixed_z).reshape(-1, 1, 28, 28)
                data = x.reshape(-1, 1, 28, 28)
                img_grid_fake = torchvision.utils.make_grid(fake, normalize=True)
                img_grid_real = torchvision.utils.make_grid(data, normalize=True)

                writer_fake.add_image(
                    "Mnist Fake Images", img_grid_fake, global_step=step
                )
                writer_real.add_image(
                    "Mnist Real Images", img_grid_real, global_step=step
                )
                step += 1
    