Tasks
1. Train a Working GAN (done)
2. Train a general classifer (Train the same classifer at different dataset sizes, plot its datasize vs accuracy)
3. Train a classifier that uses the smaller dataset sizes + the generated images 
4. Export accuracy vs dataset size into a different file
5. plot the data from the file, see the difference in accuracy

In [0]:
import math
import torch
from torch import optim,nn
import torchvision
from torch.utils.data import DataLoader,Dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.utils as vutils
import pdb

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from skimage import color
from skimage import io
from sklearn.model_selection import train_test_split

class DCGAN_generator(nn.Module):
  """

  Attributes
  ----------
    ngpu : int
      The number of available GPU devices

  """
  def __init__(self, ngpu):
    """Init function

    Parameters
    ----------
      ngpu : int
        The number of available GPU devices

    """
    super(DCGAN_generator, self).__init__()
    self.ngpu = ngpu
        
    # just to test - will soon be args
    nz = 100 # noise dimension
    ngf = 64 # number of features map on the first layer
    nc = 3 # number of channels

    self.main = nn.Sequential(
      # input is Z, going into a convolution
      nn.ConvTranspose2d(     nz, ngf * 4, 4, 1, 0, bias=False),
      nn.BatchNorm2d(ngf * 4),
      nn.ReLU(True),
      # state size. (ngf*8) x 4 x 4
      nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
      nn.BatchNorm2d(ngf * 2),
      nn.ReLU(True),
      # state size. (ngf*4) x 8 x 8
      nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
      nn.BatchNorm2d(ngf),
      nn.ReLU(True),
      # state size. (ngf*2) x 16 x 16
      nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
      nn.Tanh()
      # state size. (nc) x 64 x 64
    )

  def forward(self, input):
    """Forward function

    Parameters
    ----------
    input : :py:class:`torch.Tensor`
    
    Returns
    -------
    :py:class:`torch.Tensor`
      the output of the generator (i.e. an image)

    """
    #if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
    #  output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
    #else:
    #  output = self.main(input)
    
    # let's assume that we will never face the case where more than a GPU is used ...
    output = self.main(input)
    return output



class DCGAN_discriminator(nn.Module):
  """ 

  Attributes
  ----------
    ngpu : int
      The number of available GPU devices

  """
  def __init__(self, ngpu):
    """Init function

    Parameters
    ----------
      ngpu : int
        The number of available GPU devices

    """
    super(DCGAN_discriminator, self).__init__()
    self.ngpu = ngpu
        
        
    # just to test - will soon be args
    ndf = 64
    nc = 3
       
    self.main = nn.Sequential(
      nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
      nn.BatchNorm2d(ndf),
      nn.LeakyReLU(0.2, inplace=True),

      nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
      nn.BatchNorm2d(ndf * 2),
      nn.LeakyReLU(0.2, inplace=True),
      # state size. (ndf*4) x 8 x 8
      nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
      nn.BatchNorm2d(ndf * 4),
      nn.LeakyReLU(0.2, inplace=True),
      # state size. (ndf*8) x 4 x 4
      nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
      nn.Sigmoid()
    )

  def forward(self, input):
    """Forward function

    Parameters
    ----------
    input : :py:class:`torch.Tensor`
    
    Returns
    -------
    :py:class:`torch.Tensor`
      the output of the generator (i.e. an image)

    """
    #if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
    #  output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
    #else:
    #  output = self.main(input)
    
    # let's assume that we will never face the case where more than a GPU is used ...
    output = self.main(input)

    return output.view(-1, 1).squeeze(1)

transform = transforms.Compose(
    [transforms.ToTensor()])

batch_size = 64
trainset = datasets.SVHN("/content", split='train', download = True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,)

testset = datasets.SVHN("/content", split='test', download = True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# define device 
device = torch.device("cuda:0")

# data for plotting purposes
generatorLosses = []
discriminatorLosses = []

#training starts

epochs = 25

input_size = 32

real_label = 1
fake_label = 0


# models
netG = DCGAN_generator(1)
netD = DCGAN_discriminator(1)

netG.to(device)
netD.to(device)

print(netG)

# optimizers 
optD = optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999)) 
optG = optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999)) 

input_length = int(math.log(128, 2))

loss = nn.BCELoss()

for epoch in range(epochs):

  for i, data in enumerate(trainloader, 0):
    
    dataiter = iter(trainloader)
    inputs, labels = dataiter.next()
    inputs, labels = inputs.to(device), labels.to(device)
    tmpBatchSize = len(labels)

    # create label arrays 
    true_label = torch.ones(tmpBatchSize, 1, device=device)
    fake_label = torch.zeros(tmpBatchSize, 1, device=device)
    # print(inputs)
    # print(labels)

    # generate fake images // im struggling here as well
    r = torch.randn(tmpBatchSize, 100, 1, 1, device=device) #not sure if this is correct but it isnt giving errors
    # print(r)
    fakeImageBatch = netG(r)
    # print(fakeImageBatch)

    # # visualize the fake image 
    # plt.subplot(1,2,2)
    # plt.axis("off")
    # plt.title("Fake Images")
    # plt.imshow(np.transpose(vutils.make_grid(fakeImageBatch, padding=2, normalize=True)))
    # plt.show()

    real_cpu = data[0].to(device)
    batch_size = real_cpu.size(0)
    # print(batch_size)

    # train generator on real images
    # predictionsReal = netD(real_cpu).view(-1)
    predictionsReal = netD(inputs)
    lossDiscriminator = loss(predictionsReal, true_label) #labels = 1
    lossDiscriminator.backward(retain_graph = True)

    # train generator on fake images
    predictionsFake = netD(fakeImageBatch)
    lossFake = loss(predictionsFake, fake_label)  #labels = 0
    lossFake.backward(retain_graph= True)
    optD.step() # update discriminator parameters    

    # train generator 
    optG.zero_grad()
    predictionsFake = netD(fakeImageBatch)
    # batch_size = 8192
    # true_label = torch.full((batch_size,), real_label, device=device)
    lossGenerator = loss(predictionsFake, true_label) #labels = 1
    lossGenerator.backward(retain_graph = True)
    optG.step()

    # reset the gradients
    optD.zero_grad()
    optG.zero_grad()

    # save losses for graphing
    generatorLosses.append(lossGenerator.item())
    discriminatorLosses.append(lossDiscriminator.item())

    # # save generated images 
    if(i % 100 == 0):
       gridOfFakeImages = torchvision.utils.make_grid(fakeImageBatch.cpu())
       torchvision.utils.save_image(gridOfFakeImages, "/content/gridOfFakeImages/" + str(epoch) + '_' + str(i) + '.png')

  print("Epoch " + str(epoch) + "Complete")
  print("Generator Loss: " + str(lossGenerator))
  print("Discriminator Loss: " + str(lossDiscriminator))

def validate():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

#save models
torch.save(netG, "netG.h5")
torch.save(netD, "netD.h5")

# plot losses
plt.figure(figsize=(10,5))
plt.title("Loss of Models")
plt.plot(generatorLosses,label="Generator")
plt.plot(discriminatorLosses,label="Discriminator")
plt.xlabel("Batches")
plt.ylabel("Loss")
plt.legend()
plt.show()

file = open("modelData.txt", "w")

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

file.close() 