In [1]:
#import necessary packages
from __future__ import print_function
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

In [2]:
from tqdm import tqdm #progress bar!!

In [3]:
#creates a random seed from 1 - 10000 reinitialized at runtime
manualSeed = random.randint(1, 10000)
print("The random seed for this run is:", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

The random seed for this run is: 6077


<torch._C.Generator at 0x7f977806d290>

In [4]:
#testing pathname
os.path.exists("melPlots/drops")

True

In [5]:
#directory where data lives
dataroot = "melPlots/drops"

#number of subprocesses to use for loading data
workers = 2

#batch size for training
batch_size = 40

#Size of square melspec images. Images will be resized to this size.
image_size = 218

#number of color channels in training images. Color images have 3.
nc = 3

#size of the z vector, aka input of the generator
nz = 100

#size of feature maps in generator, set to same size as melspec images.
ngf = 218

#size of feature maps in discriminator, set to same size as melspec images
ndf = 218

#number of training epochs, will be low for faster runtime
num_epochs = 5

#learning rate for optimizers, set low for low step size
lr = 0.0005

#Beta1 hyperparameter for Adam optimizer
beta1 = 0.5

#number of GPUs available. Use 0 for CPU mode, but this cluster has 1.
ngpu = 1

In [6]:
#we use the image folder directory to load the data
#directory needs at least one subfolder/subdirectory
#elsee it will throw an exception.
dataset = dset.ImageFolder(root = dataroot,
                          transform = transforms.Compose([
                              transforms.ToTensor()
                          ]))

#create the dataloader that loads the data
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True, num_workers = workers)

#this decides what we run the algorithm on. Here, we will use the GPU.
if (torch.cuda.is_available() and ngpu > 0):
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

  return torch._C._cuda_getDeviceCount() > 0


In [7]:
#follow paper guidelines on weights initialization
def weights_init(network):
    
    #pulls what kind of layer it is
    classname = network.__class__.__name__
    
    #if it is a convolutional layer
    if classname.find('Conv') != -1:
        
        #reinitialize weights to have mean 0, std 0.02
        nn.init.normal_(network.weight.data, 0.0, 0.02)
        
    #else if it is a batch norm layer
    elif classname.find('BatchNorm') != -1:
        
        #reinitialize weights to have mean 1, std 0.02
        nn.init.normal_(network.weight.data, 1.0, 0.02)
        nn.init.constant_(network.bias.data, 0)

In [8]:
#define the generator class
#note: since each layer is structured in terms of our preset dimensions
#each layer should scale and output correctly
#with an end output of (nc) x 218 x 218

#layers are structured as in the paper

class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
        
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            
        )
        
    #each step forward will parse through the sequential NN structure
    #defined above
    def forward(self, input):
        return self.main(input)

In [9]:
#create the generator defined previously
network_G = Generator(ngpu).to(device)

#and initalize weights using the functioned defined previously
network_G.apply(weights_init)

#print the model to see the structure
print(network_G)

Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 1744, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(1744, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(1744, 872, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(872, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(872, 436, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(436, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(436, 218, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(218, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(218, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()


In [10]:
#now that generator is made, define the discriminator
#note: since each layer is structured in terms of our preset dimensions
#each layer should scale and output correctly

#main difference is using LeakyReLU instead of ReLU activation
#using regular conv instead of transpose conv
#and using Sigmoid instead of Tanh for the final layer

#layers are as structured in the paper

class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

In [11]:
#create the generator defined previously
network_D = Discriminator(ngpu).to(device)

#and initalize weights using the functioned defined previously
network_D.apply(weights_init)

#print the model to see the structure
print(network_D)

Discriminator(
  (main): Sequential(
    (0): Conv2d(3, 218, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(218, 436, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(436, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(436, 872, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(872, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(872, 1744, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(1744, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(1744, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()
  )
)


In [12]:
#initialize Binary Cross Entropy loss function as our loss function
criterion = nn.BCELoss()

#Generate batch of latent vectors which the generator will learn
#to map to realistic looking melspecs
fixed_noise = torch.randn(218, nz, 1, 1, device = device)

#label real and fake melspecs with real being 1 and fake being 0
real_label = 1
fake_label = 0

#use Adam optimizers for generator and discriminator
optimizer_D = optim.Adam(network_D.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_G = optim.Adam(network_G.parameters(), lr=lr, betas=(beta1, 0.999))

In [13]:
#the main training loop

#initialize empty lists to record progress
melspec_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting the Training Loop...")
#for each epoch
for epoch in tqdm(range(num_epochs)):
    
    #for each batch in the dataloader
    for i, data in enumerate(dataloader, 0):
        
        #train discriminator on the real melspecs
        network_D.zero_grad()
        
        #format batch of real melspecs
        real_cpu = data[0].to(device)
        batch_size = real_cpu.size(0)
        label = torch.full((batch_size,), real_label, dtype=torch.float, device=device)
        
        #forward pass real melspec batch through discriminator
        output = network_D(real_cpu).view(-1)
        
        #calculate loss on real batch
        errorD_real = criterion(output, label)
        
        #calculate gradients for discriminator via backprop.
        errorD_real.backward()
        D_x = output.mean().item()
        
        #then train discriminator on fakes
        rand_noise = torch.randn(batch_size, nz, 1, 1, device = device)
        
        #make fake melspecs with generator and update labels
        fake = network_G(rand_noise)
        label.fill(fake_label)
        
        #use discriminator to classify fakes
        output = network_D(fake.detach()).view(-1)
        
        #calculate loss of discriminator on fakes
        errorD_fake = criterion(output, label)
        
        #calculate the gradients for fakes and aggregate with gradients for the reals
        errorD_fake.backward()
        D_G_z1 = output.mean().item()
        
        #sum the errors
        errorD = errorD_real + errorD_fake
        
        #finally, update discriminator
        optimizerD.step()
        
        
        #now basically do it all again for the generator this time
        network_G.zero_grad()
        label.fill_(real_label) #by convention fake labels are real for generator loss
        
        #since discriminator has updated, do another forward pass
        #of all fakes through discriminator
        output = network_D(fake).view(-1)
        
        #calculate generator loss based on updated output
        errorG = criterion(output, label)
        
        #backprop to find gradients
        errorG.backward()
        D_G_z2 = output.mean().item()
        
        #and finally, update generator
        optimizerG.step()
        
        
        #not done yet though! we want to see and save progress, so
        #we'll print out progress through every 50~500 iterations, since
        #for each epoch we'll be iterating a *lot*.
        
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' 
                 % (epoch, num_epochs, i, len(dataloader),
                     errorD.item(), errorG.item(), D_x, D_G_z1, D_G_z2))
        
        #record losses in lists created earlier
        G_losses.append(errorG.item())
        D_losses.append(errorD.item())
        
        #save some of the melspecs to img_list so we can actually convert them later
        
        if (iters % 500 == 0) or ((epoch == num_epochs - 1) and (i == len(dataloader) - 1)):
            with torch.no_grad():
                fake = network_G(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding = 0, normalize = False))
        
        iters += 1

  0%|          | 0/5 [00:00<?, ?it/s]

Starting the Training Loop...


  0%|          | 0/5 [00:05<?, ?it/s]


ValueError: Using a target size (torch.Size([40])) that is different to the input size (torch.Size([4000])) is deprecated. Please ensure they have the same size.

In [None]:
#once the training successfully finishes, print out training over time
