In [1]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [13]:
random_size = 64 
hidden_size = 256 
image_size = 784
num_epoch = 10
batch_size = 100
learning_rate = 0.0002
sample_dir = 'samples'


if not os.path.exists(sample_dir):
    os.makedirs(sample_dir)
    
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean =(0.5,0.5,0.5),# this is for RGB image(3 channel)
                        std = (0.5,0.5,0.5))])

data = torchvision.datasets.MNIST(root = 'Data', transform = transform, train=True, download = True)
data_loader = torch.utils.data.DataLoader(dataset = data, shuffle = True, batch_size = batch_size)

def denorm(x):
    out = (x + 1) / 2
    return out.clamp(0, 1)


class Discriminator(nn.Module):
    def __init__(self, image_size, hidden_size):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(image_size, hidden_size)
        self.relu = nn.LeakyReLU(0.2)#this is the negative slope. 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.sigmoid(out)
        return (out)
class Generator(nn.Module):
    def __init__(self, random_size, hidden_size, image_size):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(random_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, image_size)
        self.tanh = nn.Tanh()
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.tanh(out)
        return(out)
    
model_d = Discriminator(image_size, hidden_size).to(device)
model_g = Generator(random_size, hidden_size, image_size).to(device)

criterion = nn.BCELoss() #using cross entropy loss
optimizer_d = torch.optim.Adam(model_d.parameters(), lr = learning_rate)
optimizer_g = torch.optim.Adam(model_g.parameters(), lr = learning_rate)

total_step = len(data_loader)

for epoch in range(num_epoch):
    for i, (images, labels) in enumerate(data_loader):
        images = images.reshape(-1, image_size).to(device)
        
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)
        
        
        #To train the Discriminator
        output_d_real = model_d(images)
        d_real_loss = criterion(output_d_real, real_labels)
        
        z = torch.randn(batch_size, random_size).to(device)
        fake_images = model_g(z)
        output_d_fake = model_d(fake_images)
        d_fake_loss = criterion(output_d_fake, fake_labels)
        d_loss = d_real_loss + d_fake_loss
        
        optimizer_d.zero_grad()
        d_loss.backward()
        optimizer_d.step()#this is going to update only parameters of discriminator
        
        #to train the generator
        z = torch.randn(batch_size, random_size).to(device)
        fake_images = model_g(z)
        outputs = model_d(fake_images)
        
        #to train the generator the output of this should be compared with real_labels. 
        #so we compare the output by real label. 
        
        g_loss = criterion(outputs, real_labels)
        
        optimizer_g.zero_grad()
        g_loss.backward()
        optimizer_g.step()
        
        if (i+1) % 200 == 0:
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f} ' 
                  .format(epoch, num_epoch, i+1, total_step, d_loss.item(), g_loss.item()))
    if (epoch == num_epoch-1):
        fake_image = fake_images.reshape(fake_images.size(0),1,28,28)
        save_image(denorm(fake_images), os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1)))
        
        

Epoch [0/10], Step [200/600], d_loss: 0.0452, g_loss: 4.3386 
Epoch [0/10], Step [400/600], d_loss: 0.1360, g_loss: 4.9729 
Epoch [0/10], Step [600/600], d_loss: 0.0328, g_loss: 5.3808 
Epoch [1/10], Step [200/600], d_loss: 0.0616, g_loss: 5.6563 
Epoch [1/10], Step [400/600], d_loss: 0.2106, g_loss: 3.5196 
Epoch [1/10], Step [600/600], d_loss: 0.3836, g_loss: 4.4486 
Epoch [2/10], Step [200/600], d_loss: 0.1566, g_loss: 3.4256 
Epoch [2/10], Step [400/600], d_loss: 1.1676, g_loss: 2.7942 
Epoch [2/10], Step [600/600], d_loss: 0.1919, g_loss: 4.2086 
Epoch [3/10], Step [200/600], d_loss: 0.2330, g_loss: 3.6334 
Epoch [3/10], Step [400/600], d_loss: 0.8132, g_loss: 3.4572 
Epoch [3/10], Step [600/600], d_loss: 0.1976, g_loss: 3.9961 
Epoch [4/10], Step [200/600], d_loss: 0.6469, g_loss: 3.4096 
Epoch [4/10], Step [400/600], d_loss: 0.3115, g_loss: 3.8466 
Epoch [4/10], Step [600/600], d_loss: 0.1797, g_loss: 3.2961 
Epoch [5/10], Step [200/600], d_loss: 0.4727, g_loss: 5.3163 
Epoch [5

In [17]:
torch.save(model_g.state_dict(), 'G.ckpt')  #we are saving only the parameters
torch.save(model_d.state_dict(), 'D.ckpt')  #if memory is not an issue then better save the whole model. 
torch.save(model_g, 'G_model.ckpt')#one more important thing is that the device which you have used to train will be only used for testing also(cuda or CPU)

  "type " + obj.__name__ + ". It won't be checked "


In [16]:
g_model = torch.load('G.ckpt') 
d_model = torch.load('D.ckpt')

print(g_model)

OrderedDict([('fc1.weight', tensor([[ 0.0204, -0.0074,  0.0106,  ..., -0.0078,  0.0017,  0.0054],
        [ 0.0491, -0.0023,  0.0294,  ...,  0.0021, -0.0113,  0.0027],
        [ 0.0573, -0.0045,  0.0484,  ..., -0.0216, -0.0126, -0.0022],
        ...,
        [-0.0088, -0.0236, -0.0427,  ...,  0.0254,  0.0002, -0.0157],
        [-0.0049,  0.0072,  0.0023,  ..., -0.0027,  0.0008,  0.0084],
        [ 0.0249, -0.0165,  0.0199,  ...,  0.0613, -0.0519, -0.0095]],
       device='cuda:0')), ('fc1.bias', tensor([-0.0935, -0.1042, -0.0458, -0.1942, -0.1662, -0.1799, -0.1838,  0.0796,
        -0.1442, -0.2187, -0.1250, -0.1063, -0.1021, -0.1250, -0.0782, -0.2127,
        -0.1127, -0.1682, -0.0990, -0.0644, -0.0494, -0.1956, -0.0590, -0.0468,
        -0.1630, -0.0769, -0.1232, -0.0913, -0.0860, -0.1368, -0.0371, -0.0885,
        -0.0736, -0.1410, -0.1843, -0.0407, -0.2049, -0.1075, -0.0249, -0.0482,
        -0.0548, -0.0398, -0.0879, -0.0353, -0.2352, -0.1684, -0.1707, -0.1164,
        -0.0800, -0

In [23]:
model = torch.load('G_model.ckpt')
print(model)
z = torch.randn(batch_size, random_size).to(device) #to show that is stores model as well as parameters
fake_images = model(z)
print(fake_images)

Generator(
  (fc1): Linear(in_features=64, out_features=256, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=784, bias=True)
  (tanh): Tanh()
)
tensor([[-0.8309, -0.7986, -0.9823,  ..., -0.9560, -0.9585, -0.8854],
        [-0.9631, -0.9526, -0.9981,  ..., -0.9983, -0.9878, -0.9978],
        [-0.8655, -0.7915, -0.9885,  ..., -0.9779, -0.9512, -0.9676],
        ...,
        [-0.8522, -0.8073, -0.9909,  ..., -0.9791, -0.9622, -0.9327],
        [-0.6143, -0.6682, -0.9381,  ..., -0.8887, -0.8573, -0.6515],
        [-0.9929, -0.9685, -1.0000,  ..., -0.9999, -0.9998, -0.9992]],
       device='cuda:0', grad_fn=<TanhBackward>)
