<h1> Generative Adversarial Networks using the OpenAI Gym and PyTorch </h1>
<div> 
<p> <b> Swastik Nath 2020. </b>
<p> Before running this notebook, make sure you have proper access to a GPU as this notebook will completely depend upon GPU for its workload deployments.</p>
</div>

In [1]:
!pip install opencv-python
!pip install --upgrade tensorflow
!pip install --upgrade grpcio
import tensorflow as tf
import random
import argparse
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.utils as vutils
import gym
import gym.spaces
import gym.logger as logstat
logstat.set_level(gym.logger.INFO)
import numpy as np
from gym.wrappers import Monitor
!pip install pyvirtualdisplay
!apt-get install python-opengl ffmpeg xvfb 

Requirement already up-to-date: tensorflow in /usr/local/lib/python3.6/dist-packages (2.1.0)
Requirement already up-to-date: grpcio in /usr/local/lib/python3.6/dist-packages (1.27.2)
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
ffmpeg is already the newest version (7:3.4.6-0ubuntu0.18.04.1).
xvfb is already the newest version (2:1.19.6-1ubuntu4.3).
The following package was automatically installed and is no longer required:
  libnvidia-common-430
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 25 not upgraded.


In [0]:
LATENT_VECTOR_SIZE=100
DISCR_FILTERS = 64
GENER_FILTERS = 64
BATCH_SIZE = 16

In [0]:
IMAGE_SIZE = 64
LEARNING_RATE = 0.0001
REPORT_EVERY_ITER = 100
SAVE_IMAGE_EVERY_ITER=1000

In [0]:
class InputWrapper(gym.ObservationWrapper):
  ''' 
  Preprocessing of Input images pipeline
  '''
  def __init__(self, *args):
    super(InputWrapper, self).__init__(*args)
    assert isinstance(self.observation_space, gym.spaces.Box)
    old_space = self.observation_space
    self.observation_space = gym.spaces.Box(self.observation(old_space.low), self.observation(old_space.high), dtype= np.float32)
  def observation(self, observation):
    #resizing the input image
    new_obs = cv2.resize(observation, (IMAGE_SIZE, IMAGE_SIZE))
    #transforming the input shape (200, 100, 3) -> (3, 200, 100)
    new_obs = np.moveaxis(new_obs, 2, 0)
    return new_obs.astype(np.float32)
    

In [0]:
class Discriminator(nn.Module):
    def __init__(self, input_shape):
        super(Discriminator, self).__init__()
        # this pipe converges image into the single number using a series of Convulutions, Relu, Batch Normalizations and at last using the Sigmoid function.
        self.conv_pipe = nn.Sequential(
            nn.Conv2d(in_channels=input_shape[0], out_channels=DISCR_FILTERS,
                      kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS, out_channels=DISCR_FILTERS*2,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS*2),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 2, out_channels=DISCR_FILTERS * 4,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS * 4),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 4, out_channels=DISCR_FILTERS * 8,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS * 8),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 8, out_channels=1,
                      kernel_size=4, stride=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        conv_out = self.conv_pipe(x)
        return conv_out.view(-1, 1).squeeze(dim=1)

In [0]:
class Generator(nn.Module):
  def __init__(self, output_shape):
    super(Generator, self).__init__()
    self.pipe = nn.Sequential(
        nn.ConvTranspose2d(in_channels=LATENT_VECTOR_SIZE,out_channels=GENER_FILTERS*8, kernel_size=4, stride=1, padding=0),
        nn.BatchNorm2d(GENER_FILTERS*8),
        nn.ReLU(),
        nn.ConvTranspose2d(in_channels=GENER_FILTERS*8, out_channels=GENER_FILTERS*4, kernel_size=4, stride=2, padding=1), 
        nn.BatchNorm2d(GENER_FILTERS*4),
        nn.ReLU(),
        nn.ConvTranspose2d(in_channels=GENER_FILTERS*4, out_channels=GENER_FILTERS*2, kernel_size=4, stride=2, padding=1),
        nn.BatchNorm2d(GENER_FILTERS*2),
        nn.ReLU(),
        nn.ConvTranspose2d(in_channels=GENER_FILTERS*2, out_channels=GENER_FILTERS, kernel_size=4, stride=2, padding=1),
        nn.BatchNorm2d(GENER_FILTERS),
        nn.ReLU(),
        nn.ConvTranspose2d(in_channels=GENER_FILTERS, out_channels=output_shape[0], kernel_size=4, stride=2, padding=1), 
        nn.Tanh()
    )
  def forward(self, x):
    return self.pipe(x)

In [0]:
def iterate_batches(envs, batch_size=BATCH_SIZE):
  batch = [e.reset() for e in envs]
  env_gen = iter(lambda: random.choice(envs), None)

  while True:
    e = next(env_gen)
    obs, reward, is_done, info = e.step(e.action_space.sample())
    if np.mean(obs) > 0.01:
      batch.append(obs)
    if len(batch) == batch_size:
      #normalizing between -1 to 1
      batch_np = np.array(batch, dtype=np.float32)* 2.0/255.0 - 1.0
      yield torch.tensor(batch_np)
      batch.clear()
    if is_done:
      e.reset()

In [8]:
from tqdm import tqdm
if __name__ == "__main__":
    device = torch.device("cuda")
    envs = [InputWrapper(gym.make(name)) for name in ('Breakout-v0', 'AirRaid-v0', 'Pong-v0')]
    input_shape = envs[0].observation_space.shape

    net_discr = Discriminator(input_shape=input_shape).to(device)
    net_gener = Generator(output_shape=input_shape).to(device)

    objective = nn.BCELoss()
    gen_optimizer = optim.Adam(params=net_gener.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
    dis_optimizer = optim.Adam(params=net_discr.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
    

    gen_losses = []
    dis_losses = []
    iter_no = 0

    true_labels_v = torch.ones(BATCH_SIZE, dtype=torch.float32, device=device)
    fake_labels_v = torch.zeros(BATCH_SIZE, dtype=torch.float32, device=device)

    pbar = tqdm(total=7000)

    for batch_v in iterate_batches(envs):
        gen_input_v = torch.FloatTensor(BATCH_SIZE, LATENT_VECTOR_SIZE, 1, 1).normal_(0, 1).to(device)
        batch_v = batch_v.to(device)
        gen_output_v = net_gener(gen_input_v)

        # train discriminator
        dis_optimizer.zero_grad()
        dis_output_true_v = net_discr(batch_v)
        dis_output_fake_v = net_discr(gen_output_v.detach())
        dis_loss = objective(dis_output_true_v, true_labels_v) + objective(dis_output_fake_v, fake_labels_v)
        dis_loss.backward()
        dis_optimizer.step()
        dis_losses.append(dis_loss.item())

        # train generator
        gen_optimizer.zero_grad()
        dis_output_v = net_discr(gen_output_v)
        gen_loss_v = objective(dis_output_v, true_labels_v)
        gen_loss_v.backward()
        gen_optimizer.step()
        gen_losses.append(gen_loss_v.item())

        iter_no += 1
        pbar.update(1)

        if iter_no == 7000:
          pbar.close()
          print("Final Result: ")
          print("Iteration : %d, Discriminator Loss: %f, Generator Loss: %f " % (iter_no, dis_loss.item(), gen_loss_v.item()))
          break;

INFO: Making new env: Breakout-v0
INFO: Making new env: AirRaid-v0
INFO: Making new env: Pong-v0


100%|██████████| 7000/7000 [07:35<00:00, 15.38it/s]

Final Result: 
Iteration : 7000, Discriminator Loss: 0.005120, Generator Loss: 9.145634 



