GAN Strcuture (GRU)


In [45]:
%pip install torch==1.8.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
%pip install torchtext==0.9

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [59]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import csv
import numpy as np
import random
import torch.optim as optim
import torch.utils.data
import torchtext
import pandas as pd
import matplotlib.pyplot as plt
import os.path
import json
import ast
import glob
import re
import string
import collections
from torchtext.legacy import data

torch.manual_seed(1)

class Discriminator(nn.Module):

    def __init__(self,vocab_size,hidden_size,n_layers=1):
        super(Discriminator, self).__init__()
        self.emb = torch.eye(vocab_size)
        self.hidden_size = hidden_size
        self.rnn = nn.GRU(vocab_size,hidden_size,batch_first=True)
        self.fc1 = nn.Linear(hidden_size,50)
        self.fc2 = nn.Linear(50,1)

    def forward(self, x, hidden = None):
        
        x = self.emb[x]
        out, hidden = self.rnn(x,hidden)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return torch.sigmoid(out)

class Generator(nn.Module):

    def __init__(self,vocab_size,hidden_size,n_layers=1):
        super(Generator, self).__init__()
        self.emb = torch.eye(vocab_size)
        self.rnn = nn.GRU(vocab_size,hidden_size, n_layers,batch_first=True)
        self.fc1 = nn.Linear(hidden_size,vocab_size)

    def forward(self, x, hidden = None):
        x = self.emb[x]
        out, hidden = self.rnn(x,hidden)
        out = self.fc1(out)
        return out, hidden



Previous Training (Example From Tutorial)


In [47]:
def train (trainDS,G,D,lr=0.002,batch_size=1,num_epochs=10):
  d_optimizer = optim.Adam(D.parameters(), lr)
  g_optimizer = optim.Adam(G.parameters(), lr)

  criterion = nn.MSELoss()

  train_data = trainDS
  #train_loader = train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

  samples = []
  losses = []

  for epoch in range(num_epochs):
    D.train()
    G.train()

    for batch_i in range(len(train_data)):
   # for batch_i,  real_recipes,ingredients) in enumerate(train_data['directions'],train_data['ingredients']):
            real_recipes = train_data['directions'][batch_i]
            ingredients = train_data['ingredients'][batch_i]

            # batch_size = real_recipes.size(0)

            # === Train the Discriminator ===
            
            d_optimizer.zero_grad()

            # discriminator losses on real images 
          
            D_real = D(real_recipes)
            labels = torch.ones(batch_size)

           
            D_real= sum(D_real)/D_real.shape[0]
            d_real_loss = criterion(D_real, labels)

            
            # discriminator losses on fake images
            # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # z = torch.from_numpy(z).float()
            
            ingredients = torch.Tensor(ingredients) 
            ingredients = ingredients.to(torch.long)

            fake_recipes = G(ingredients)

            fake_recipes = torch.Tensor(fake_recipes) 
            fake_recipes = fake_recipes.to(torch.long)
            D_fake = D(fake_recipes)
            
            #labels = torch.zeros(batch_size) # fake labels = 0
            labelsD = torch.zeros(1)
            labelsD = torch.diag(labelsD,0)
            d_fake_loss = criterion(D_fake, labelsD)
            
            # add up losses and update parameters
            d_loss = d_real_loss + d_fake_loss
            d_loss.backward()
            d_optimizer.step()


             # === Train the Generator ===
            g_optimizer.zero_grad()
            
            # generator losses on fake images
            # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # z = torch.from_numpy(z).float()

            ingredients = torch.Tensor(ingredients) 
            ingredients = ingredients.to(torch.long)

            fake_recipes = G(ingredients)

            fake_recipes = torch.Tensor(fake_recipes) 
            fake_recipes = fake_recipes.to(torch.long)

            D_fake = D(fake_recipes)
            #labels = torch.ones(batch_size) #flipped labels


            labels = torch.ones(batch_size)
            # compute loss and update parameters
            g_loss = criterion(D_fake,labels)
            g_loss.backward()
            g_optimizer.step()


            print('Epoch [%d/%d], d_loss: %.4f, g_loss: %.4f, ' 
                % (epoch + 1, num_epochs, d_loss.item(), g_loss.item()))

            # append discriminator loss and generator loss
            losses.append((d_loss.item(), g_loss.item()))


    return losses


# New Training

In [48]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Setting HyperParameters

In [153]:
lr = 3e-4
batch_size = 12
num_epochs = 5
max_recipe_len = 1000


Formatting Dataset (From Baseline)

In [50]:
patterns = [r'\'', r'\"', r'\.', r'<br \/>', r',', r'\(', r'\)', r'\!', r'\?', r'\:', r'\s+']
replacements = [' \'  ', '', ' . ', ' ', ' , ', ' ( ', ' ) ', ' ! ', ' ? ', ' ', ' ']
patterns_dict = list((re.compile(p), r) for p, r in zip(patterns, replacements))

def basic_english_normalize(line):
    line = line.lower()
    for pattern_re, replaced_str in patterns_dict:
        line = pattern_re.sub(replaced_str, line)
    return line.split()

directions_field = torchtext.legacy.data.Field(sequential=True,
                                  tokenize=basic_english_normalize,
                                  include_lengths=True,
                                  batch_first=False,
                                  use_vocab=True,
                                  init_token="<BOS>",
                                  eos_token="<EOS>")

fields = [('directions', directions_field), ('ingredients', None)]
baseline_data = torchtext.legacy.data.TabularDataset("drive/MyDrive/data/char_nlg_df.csv", "csv", fields)

directions_field.build_vocab(baseline_data)
directions_field.vocab = torchtext.vocab.Vocab(directions_field.vocab.freqs, specials=['<unk>','<pad>', '<BOS>', '<EOS>', ';'])
baseline_vocab_stoi = directions_field.vocab.stoi
baseline_vocab_itos = directions_field.vocab.itos
baseline_vocab_size = len(directions_field.vocab.itos)
print(baseline_vocab_size)
print(directions_field.vocab.itos)

# make data fields for ingredients
ingredients_field = torchtext.legacy.data.Field(sequential=True,
                                  tokenize=basic_english_normalize,
                                  include_lengths=True,
                                  batch_first=True,
                                  use_vocab=True,
                                  init_token="<BOS>",
                                  eos_token="<EOS>")
ing_fields = [('directions', None), ('ingredients', ingredients_field)]
ingredients_data = torchtext.legacy.data.TabularDataset("drive/MyDrive/data/char_nlg_df.csv", "csv", ing_fields)
ingredients_field.build_vocab(ingredients_data)
ingredients_field.vocab = torchtext.vocab.Vocab(ingredients_field.vocab.freqs, specials=['<unk>','<pad>', '<BOS>', '<EOS>', ';'])
ingredients_vocab_stoi = ingredients_field.vocab.stoi
ingredients_vocab_itos = ingredients_field.vocab.itos
ingredients_vocab_size = len(ingredients_field.vocab.itos)
print(ingredients_vocab_size)
print(ingredients_field.vocab.itos)

3502
['<unk>', '<pad>', '<BOS>', '<EOS>', ';', 'and', 'in', 'to', 'add', 'with', 'minutes', 'until', 'a', 'mix', 'for', '1', 'the', 'bake', 'at', 'of', 'into', '2', 'on', 'ingredients', 'sugar', 'over', 'or', 'pour', 'stir', 'pan', 'mixture', '350', 'well', 'cook', 'butter', 'cheese', 'heat', 'water', 'together', 'top', 'inch', 'combine', 'salt', 'flour', 'cream', 'all', 'place', 'oven', 'bowl', 'brown', 'cool', 'milk', 'beat', 'baking', 'x', '4', '3', 'chicken', 'eggs', 'cover', 'put', 'onion', 'about', 'dish', 'sprinkle', 'pepper', 'then', 'cup', 'large', '9', 'serve', '10', 'cut', 'greased', 'drain', '30', 'remaining', 'is', 'remove', 'hours', 'boil', 'from', 'sauce', 'let', 'spread', 'hour', 'egg', 'vanilla', '8', '5', 'cake', 'oil', 'makes', 'set', '6', 'hot', '13', '20', 'nuts', '15', 'meat', 'margarine', 'dry', 'simmer', 'roll', 'stirring', 'casserole', 'medium', 'chill', 'each', 'blend', 'soup', 'juice', 'tender', 'pie', 'melt', 'layer', 'potatoes', 'skillet', 'sheet', 'if', 's

In [51]:
def sample_sequence(model, max_len=1000, temperature=0.8):
    generated_sequence = ""
    
    inp = torch.Tensor([baseline_vocab_stoi["<BOS>"]]).long()
    hidden = None
    step = 1

    for c in range(max_len):
          output, hidden = model(inp.unsqueeze(0), hidden)
          output_dist = output.data.view(-1).div(temperature).exp()
          top = int(torch.multinomial(output_dist, 1)[0])

          predicted_char = baseline_vocab_itos[top]

          if predicted_char == "<pad>":
              continue

          if predicted_char == "<BOS>":
              continue
          
          if predicted_char == "<unk>":
              continue

          if predicted_char == ";":
              step += 1
              predicted_char = str("\n " + str(step) + ".")

          if predicted_char == "<EOS>":
              break

          generated_sequence += predicted_char + " "
          inp = torch.Tensor([top]).long()

    return generated_sequence

### TESTING

In [56]:
def train_GAN(descriminator, generator, data, lr, batch_size, num_epochs):
  d_optimizer = optim.Adam(descriminator.parameters(), lr)
  g_optimizer = optim.Adam(generator.parameters(), lr)

  criterion = nn.CrossEntropyLoss()
  
  train_data = data
  #train_loader = train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
  data_iter = torchtext.legacy.data.BucketIterator(data, batch_size=batch_size, sort_key=lambda x: len(x.directions), sort_within_batch=True)

  samples = []
  losses = []

  iter = 0

  for epoch in range(num_epochs):
    descriminator.train()
    generator.train()

    avg_loss = 0
    min_loss = float('inf')
    print

    for (directions, lengths), ingredients in data_iter:
   # for batch_i,  real_recipes,ingredients) in enumerate(train_data['directions'],train_data['ingredients']):
            real_directions = directions[:, :-1]
            # print(real_directions)

            # # batch_size = real_recipes.size(0)

            # # === Train the Discriminator ===
            
            # d_optimizer.zero_grad()

            # # discriminator losses on real recipe 
          
            # D_real = descriminator(real_directions)
            # labels = torch.ones(batch_size)

           
            # D_real= sum(D_real)/D_real.shape[0]
            # d_real_loss = criterion(D_real, labels)

            
            # # discriminator losses on fake recipe
            # # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # # z = torch.from_numpy(z).float()
            
            # ingredients = torch.Tensor(ingredients) 
            # ingredients = ingredients.to(torch.long)

            # fake_recipes = generator(ingredients)

            # fake_recipes = torch.Tensor(fake_recipes) 
            # fake_recipes = fake_recipes.to(torch.long)
            # D_fake = descriminator(fake_recipes)
            
            # #labels = torch.zeros(batch_size) # fake labels = 0
            # labelsD = torch.zeros(1)
            # labelsD = torch.diag(labelsD,0)
            # d_fake_loss = criterion(D_fake, labelsD)
            
            # # add up losses and update parameters
            # d_loss = d_real_loss + d_fake_loss
            # d_loss.backward()
            # d_optimizer.step()


            #  # === Train the Generator ===
            # g_optimizer.zero_grad()
            
            # # generator losses on fake images
            # # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # # z = torch.from_numpy(z).float()

            # ingredients = torch.Tensor(ingredients) 
            # ingredients = ingredients.to(torch.long)

            # fake_recipes = generator(ingredients)

            # fake_recipes = torch.Tensor(fake_recipes) 
            # fake_recipes = fake_recipes.to(torch.long)

            # D_fake = descriminator(fake_recipes)
            # #labels = torch.ones(batch_size) #flipped labels


            # labels = torch.ones(batch_size)
            # # compute loss and update parameters
            # g_loss = criterion(D_fake,labels)
            # g_loss.backward()
            # g_optimizer.step()


            # print('Epoch [%d/%d], d_loss: %.4f, g_loss: %.4f, ' 
            #     % (epoch + 1, num_epochs, d_loss.item(), g_loss.item()))

            # # append discriminator loss and generator loss
            # losses.append((d_loss.item(), g_loss.item()))
            target = directions[:, 1:]
            inp = directions[:, :-1]
            
            g_optimizer.zero_grad()

            output, _ = generator(inp)
            loss = criterion(output.reshape(-1, baseline_vocab_size), target.reshape(-1))

            loss.backward()
            g_optimizer.step()

            avg_loss += loss
            iter += 1
            losses.append(float(loss))
            samples.append(iter)

            if iter % 200 == 0:
                  print("Iteration # %d: Loss %f" % (iter+1, float(avg_loss/200)))
                  print("Generated Recipe: \n 1. " + sample_sequence(generator, 1000, 1.5))
                  avg_loss = 0


    return losses

### CURRENT VERSION

In [241]:
def train_GAN(discriminator, generator, data, lr, batch_size, num_epochs, max_recipe_len):
  d_optimizer = optim.Adam(discriminator.parameters(), lr)
  g_optimizer = optim.Adam(generator.parameters(), lr)

  criterion = nn.BCELoss()
  
  train_data = data
  #train_loader = train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
  data_iter = torchtext.legacy.data.BucketIterator(data, batch_size=batch_size, sort_key=lambda x: len(x.directions), sort_within_batch=True)

  samples = []
  losses = []

  iter = 0
  print(len(data_iter))

  for epoch in range(num_epochs):
    avg_loss = 0
    min_loss = float('inf')

    for (directions, lengths), ingredients in data_iter:
        target = directions[:, 1:]
        inp = directions[:, :-1]

        # Zero the gradients
        g_optimizer.zero_grad()
        d_optimizer.zero_grad()

        # -------- Training the Generator ---------#
        # Generate a fake recipe
        # The real directions are being used as the input for the generator
        # This will need to be changed to the list of ingredients but for
        # now it acts as "random noise"
        word = torch.Tensor([baseline_vocab_stoi["<BOS>"]]).long()
        hidden = None
        fake_recipe = torch.empty(max_recipe_len)
        #fake_recipe[0] = baseline_vocab_stoi["<BOS>"] <- forces the first word to be BOS

        for c in range(max_recipe_len):
            output, hidden = generator(word.unsqueeze(0), hidden)
            output_dist = output.data.view(-1).div(1.5).exp()
            top = int(torch.multinomial(output_dist, 1)[0])
            fake_recipe[c] = top
            word = torch.Tensor([top]).long()
        fake_recipe = fake_recipe.unsqueeze(1)
        fake_recipe = fake_recipe.long()

        # Debugging to see how format of fake_recipe (not accepted by discriminator) differs from inp (accepted by discriminator)
        # print("-------------")
        # print("fake_recipe")
        # print(fake_recipe)
        # print("-------------")
        # print("inp")
        # print(inp)

        # Get the discriminator to make a prediction on whether the generated
        # recipe is real or fake
        D_fake = discriminator(fake_recipe, None)

        # Calculate the loss for the generator
        labels = torch.ones_like(D_fake)
        g_loss = criterion(D_fake,labels)
        g_loss.backward()
        g_optimizer.step()

        # -------- Training the Discriminator  ---------#
        # Discriminator losses on real recipes
        D_real = discriminator(inp, None)
        labels = torch.ones_like(D_real)
        d_real_loss = criterion(D_real, labels)

        # Discriminator losses on fake recipes
        D_fake = discriminator(fake_recipe, None)
        labels = torch.zeros_like(D_fake)
        d_fake_loss = criterion(D_fake, labels)

        # Add up the losses and update parameters
        # (Some sources say to average the losses but
        # in the tutorial they just add them together)
        d_loss = (d_fake_loss + d_real_loss)
        d_loss.backward()
        d_optimizer.step()



        #loss = criterion(fake_recipes.reshape(-1, baseline_vocab_size), target.reshape(-1))
        
        losses.append(float(d_loss))
        samples.append(iter)

        iter += 1
        if iter % 10 == 0:
            print("Iteration # %d:" % (iter+1))
            print("Generated Recipe: \n 1. " + sample_sequence(generator, 1000, 1.5))

        # === Train the Discriminator ===

        

            
            # # discriminator losses on fake recipe
            # # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # # z = torch.from_numpy(z).float()
            
            # ingredients = torch.Tensor(ingredients) 
            # ingredients = ingredients.to(torch.long)

            # fake_recipes = generator(ingredients)

            # fake_recipes = torch.Tensor(fake_recipes) 
            # fake_recipes = fake_recipes.to(torch.long)
            # D_fake = descriminator(fake_recipes)
            
            # #labels = torch.zeros(batch_size) # fake labels = 0
            # labelsD = torch.zeros(1)
            # labelsD = torch.diag(labelsD,0)
            # d_fake_loss = criterion(D_fake, labelsD)
            
            # # add up losses and update parameters
            # d_loss = d_real_loss + d_fake_loss
            # d_loss.backward()
            # d_optimizer.step()


            #  # === Train the Generator ===
            # g_optimizer.zero_grad()
            
            # # generator losses on fake images
            # # z = np.random.uniform(-1, 1, size=(batch_size, rand_size))
            # # z = torch.from_numpy(z).float()

            # ingredients = torch.Tensor(ingredients) 
            # ingredients = ingredients.to(torch.long)

            # fake_recipes = generator(ingredients)

            # fake_recipes = torch.Tensor(fake_recipes) 
            # fake_recipes = fake_recipes.to(torch.long)

            # D_fake = descriminator(fake_recipes)
            # #labels = torch.ones(batch_size) #flipped labels


            # labels = torch.ones(batch_size)
            # # compute loss and update parameters
            # g_loss = criterion(D_fake,labels)
            # g_loss.backward()
            # g_optimizer.step()


            # print('Epoch [%d/%d], d_loss: %.4f, g_loss: %.4f, ' 
            #     % (epoch + 1, num_epochs, d_loss.item(), g_loss.item()))

            # # append discriminator loss and generator loss
            # losses.append((d_loss.item(), g_loss.item()))
            
            
        


    return losses

In [242]:
disc = Discriminator(baseline_vocab_size, 64, n_layers=1)
gen = Generator(baseline_vocab_size, 64, n_layers=1)
losses = train_GAN(disc, gen, baseline_data, lr=lr, batch_size=2, num_epochs=num_epochs, max_recipe_len=max_recipe_len)

2500
Iteration # 11:
Generated Recipe: 
 1. stays 12 decoration fried gloves pat individual crispix possible skor stars intervals 40 jar diabetic cornflake spam pare nut 112 seasonings potato farms lumps grudges candied prepared alone formerly bananas chilled shape recipe depression ravioli brook elegant kisses lifted stop reaches cornstarch absorbent surround optionals crank herbal scramblers delicate pie stiffen smash reheated punch removing 01 spreading millet bands miracle head tomatoes n facing floured how shredded turmeric evening yields experiment taking enchiladas pralines collards 350 breakfast blender drying depending pies sugar raise bricks triscuits mat cognac anise morning shell dumplings appetizer brownish unheated lumping strands pounds sifted alternate bleu gelatine alone creamier sweat whipped procedure 10 lower sausage flounder crepes cooks alcohol container stage outdoor leftover sprinkled showers flake liquify unused transfer 425 fold unevenly rusty brush starter pl

KeyboardInterrupt: ignored