<a href="https://colab.research.google.com/github/hogch/masterproject_gan/blob/master/SeqGAN_headlines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text Generation using GAN

This notebook generates news headlines using the Machine Learning technology GAN (Generative Adversarial Networks).

## Import dependencies



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/My\ Drive/Colab\ Notebooks/Masterproject
!ls

/content/drive/My Drive/Colab Notebooks/Masterproject
abcnews-date-text.csv	 screenshots
headlines.csv		 SeqGAN
headlines-short.csv	 SeqGAN_headlines_dataloading_experiments.ipynb
news-headlines.db	 SeqGAN_headlines.ipynb
news-headlines-short.db


**Install required dependencies manually**

In [3]:
!pip install tflearn
!pip install tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip

Collecting https://github.com/chengs/tqdm/archive/colab.zip
  Downloading https://github.com/chengs/tqdm/archive/colab.zip
[K     | 481kB 1.3MB/s
Building wheels for collected packages: tqdm
  Running setup.py bdist_wheel for tqdm ... [?25l- \ done
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-6zb6xju2/wheels/41/18/ee/d5dd158441b27965855b1bbae03fa2d8a91fe645c01b419896
Successfully built tqdm
[31mspacy 2.0.18 has requirement numpy>=1.15.0, but you'll have numpy 1.14.6 which is incompatible.[0m
Installing collected packages: tqdm
  Found existing installation: tqdm 4.28.1
    Uninstalling tqdm-4.28.1:
      Successfully uninstalled tqdm-4.28.1
Successfully installed tqdm-4.28.1


**Import required modules**

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

import datetime

from tflearn.datasets import imdb
from tflearn.data_utils import pad_sequences, to_categorical

from tensorflow.python.ops import tensor_array_ops, control_flow_ops
from tensorflow.contrib import slim

from keras.preprocessing.text import Tokenizer
from tqdm import tqdm, tnrange

Using TensorFlow backend.


In [5]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


## Load Dataset

In [6]:
df = pd.read_csv('headlines.csv', sep=',', index_col='id')
df = df.sample(frac=1)
# 200000 samples needs 17-23 minutes per epoch
# 100000 samples needs 5 minutes per epoch
df = df[:50000]
print(df.shape)
df.head()

  mask |= (ar1 == a)


(50000, 3)


Unnamed: 0_level_0,publish_date,text,fake
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
291321,20070313,academic warns of bowen basin mining book nega...,0
373339,20080329,united rivals skating on thin ice,0
1077232,20170519,senior bureaucrats knew of oakden problems,0
302973,20070508,govt acknowledges threat of climate change with,0
1088895,20170825,burnie museum advocate collection,0


## Define Test-, Trainingset and Hyper-Parameter

In [0]:
# General Hyper-Parameter
BATCH_SIZE = 64
VOCAB_SIZE = 5000 # 20
SEQ_LENGTH = 100
TRAINING_SPLIT = 0.2
TOTAL_BATCH = 200

# Discriminator Hyper-Parameter
D_EMB_SIZE = 100
D_EMB_DIM = 64 # embedding dimension
D_FILTER_SIZES = [2,3]
D_NUM_CLASSES = 2
D_NUM_FILTERS = 50

# Generator Hyper-Parameter
G_START_TOKEN = 0
G_EMB_SIZE = 100
G_EMB_DIM = 32
G_HIDDEN_DIM = 32 # hidden state dimension of lstm cell

In [0]:
def get_datasets(texts, labels, tokenizer=None):
  if tokenizer is None:
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(texts)

  sequences = tokenizer.texts_to_sequences(texts)
  word_index = tokenizer.word_index
  text_data = pad_sequences(sequences, maxlen=SEQ_LENGTH)

  labels = np.asarray(labels)

  indices = np.arange(text_data.shape[0])
  np.random.shuffle(indices)
  text_data = text_data[indices]
  labels = labels[indices]
  test_size = int(TRAINING_SPLIT * text_data.shape[0])
  
  X_train = text_data[:test_size]
  y_train = to_categorical(labels[:test_size], 2)
  X_test = text_data[test_size:]
  y_test = labels[test_size:]

  return tokenizer, word_index, X_train, y_train, X_test, y_test

texts = []
labels = []

for row in zip(df['text'], df['fake']):
  texts.append(row[0].strip())
  labels.append(row[1])
  
tokenizer, word_index, X_train, y_train, X_test, y_test = get_datasets(texts, labels)

## Discriminator
model for classifying sequences (here headlines) as real or fake.
In this implementation the discriminative model uses following layers: 
1.   embedding layer
2.   convolution layer
3.   max-pooling layer
4.   softmax layer

In [0]:
tf.reset_default_graph()

class Discriminator:
  def __init__(self, vocab_size, seq_length, emb_size, filter_sizes, num_classes, num_filters):
    self.vocab_size = vocab_size
    self.emb_size = emb_size
    self.seq_length = seq_length
    self.filter_sizes = filter_sizes
    self.num_classes = num_classes
    self.num_filters = num_filters

    self.X_input = tf.placeholder(tf.int32, shape=[None, self.seq_length], name='X_input')
    self.y_input = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='y_input')
    self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
    
    # Keeping track of l2 regularization loss (optional)
    self.l2_reg_lambda = 0.0
    self.l2_loss = tf.constant(0.0)

  def build_model(self):
    self.embedding_layer = self.build_embedding_layer()
    self.convolution_maxpool_layer = self.build_convolution_maxpool_layer()
    self.scores, self.predictions = self.build_softmax_layer()
    
    self.calc_mean_cross_entropy_loss()
    self.calc_accuracy_and_cost()
        
  def build_embedding_layer(self):
    with tf.device('gpu:0'), tf.name_scope('embedding_layer'):
      W_emb = tf.Variable(
          initial_value=tf.random_uniform([self.vocab_size, self.emb_size], -1.0, 1.0), 
          name='W'
      )
      emb_chars = tf.nn.embedding_lookup(W_emb, self.X_input)
      self.emb_chars_expand = tf.expand_dims(emb_chars, -1)
    
  def build_convolution_maxpool_layer(self):
    pooled_outputs = []
    for filter_size in self.filter_sizes:
      with tf.name_scope('conv-maxpool-%s' % filter_size):
        # Convolution Layer
        filter_shape = [filter_size, self.emb_size, 1, self.num_filters]
        W_filters = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W')
        b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name='b')
        conv = tf.nn.conv2d(
            input=self.emb_chars_expand,
            filter=W_filters,
            strides=[1,1,1,1],
            padding='VALID',
            name='conv'
        )
        # Apply non-linearity - activation function
        activation = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
        # Maxpooling over outputs
        max_pooling = tf.nn.max_pool(
            value=activation,
            ksize=[1, self.seq_length-filter_size+1, 1, 1],
            strides=[1,1,1,1],
            padding='VALID',
            name='max_pooling'
        )
        pooled_outputs.append(max_pooling)
              
    self.num_filter_total = self.num_filters * len(self.filter_sizes)
    h_pool = tf.concat(pooled_outputs, axis=3)
    return tf.reshape(h_pool, [-1, self.num_filter_total])
        
  def build_softmax_layer(self): 
    with tf.name_scope('highway'):
      self.h_highway = self.highway(
          self.convolution_maxpool_layer, self.convolution_maxpool_layer.get_shape()[1], 1, 0
      )

    with tf.name_scope('dropout'):
      self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob)
      
    with tf.name_scope('softmax_output'):
      W_softmax = tf.Variable(
          tf.truncated_normal(
              [self.num_filter_total, self.num_classes], 
              stddev=0.1
          ), name='W_softmax'
      )
      b_softmax = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name='b_softmax')
            
      self.l2_loss += tf.nn.l2_loss(W_softmax)
      self.l2_loss += tf.nn.l2_loss(b_softmax)
      
      #self.scores = tf.nn.xw_plus_b(self.h_drop, W_softmax, b_softmax, name='scores')
      self.scores = tf.matmul(self.convolution_maxpool_layer, W_softmax) + b_softmax
      self.ypred_for_auc = tf.nn.softmax(self.scores)
      predictions = tf.argmax(self.scores, 1, name="predictions")
      
    return self.scores, predictions
  
  def calc_mean_cross_entropy_loss(self):
    with tf.name_scope('cross_entropy_loss'):
      losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y_input)
      self.loss = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss
      
  def calc_accuracy_and_cost(self):
    with tf.name_scope('accuracy'):
      correct_predictions = tf.equal(self.predictions, tf.argmax(self.y_input, 1))
      self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')
      self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y_input))
      
  def highway(self, input_, size, num_layers=1, bias=-2.0, f=tf.nn.relu):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).
    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y
    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
    """
    with tf.variable_scope('highway'):
      size = int(size)
      for idx in range(num_layers):
        g = f(slim.fully_connected(input_, size, scope='highway_lin_%d' % idx, activation_fn=None))
        t = tf.sigmoid(slim.fully_connected(input_, size, scope='highway_gate_%d' % idx, activation_fn=None) + bias)

        output = t * g + (1. - t) * input_
        input_ = output
        
    return output
        
  def train(self, X, y, num_epochs, batch_size, learning_rate):
    with tf.name_scope('loss'):
      optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
    
    with tf.Session() as sess:
      sess.run(tf.initialize_all_variables())
      epoch_i = 0
      
      while epoch_i < num_epochs:
        batch_i = 0
        batch_losses = []
        batch_accs = []
            
        
        for i in tnrange(batch_size, X.shape[0], batch_size):
          X_batch, y_batch = X[batch_i:i], y[batch_i:i]
          feed_dict = {
              self.X_input: X_batch,
              self.y_input: y_batch,
              self.dropout_keep_prob: 0.75
          }
          sess.run(optimizer, feed_dict)
          loss, accuracy = sess.run([self.cost, self.accuracy], feed_dict)
          
          batch_accs.append(accuracy)
          batch_losses.append(loss)
    
        time_str = datetime.datetime.now().isoformat()
        print("{}: epoch: {}, loss: {}, acc: {}".format(time_str, epoch_i, np.mean(batch_losses), np.mean(batch_accs)))
        epoch_i += 1

## Build Discriminator model and train model

In [18]:
discriminator = Discriminator(VOCAB_SIZE, SEQ_LENGTH, D_EMB_SIZE,
                              D_FILTER_SIZES, D_NUM_CLASSES, D_NUM_FILTERS)
discriminator.build_model()

#discriminator.train(X_train, y_train, 5, BATCH_SIZE, .001)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



## Generator

simple LSTM network for sequence generation.

In [0]:
class Generator:
  def __init__(self, batch_size, seq_length, vocab_size, emb_size, emb_dim, 
               hidden_dim, start_token, learning_rate, reward_gamma):
    self.batch_size = batch_size
    self.seq_length = seq_length
    self.vocab_size = vocab_size
    self.emb_size = emb_size
    self.emb_dim = emb_dim
    self.hidden_dim = hidden_dim
    self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32)
    self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
    self.reward_gamma = reward_gamma
    self.g_params = []
    self.d_params = []
    #self.temperature = 1.0
    self.grad_clip = 5.0
    
    self.expected_reward = tf.Variable(tf.zeros([self.seq_length]))
    
    self.h0 = tf.zeros([self.batch_size, self.hidden_dim])
    self.h0 = tf.stack([self.h0, self.h0])
    
    with tf.variable_scope('generator'):
      self.g_embeddings = tf.Variable(tf.random_normal([self.emb_size, self.emb_dim], stddev=0.1)) # init embeddings matrix
      self.g_params.append(self.g_embeddings)
      self.g_recurrent_unit = self.create_recurrent_unit(self.g_params) # maps h_tm1 to h_1 for generator
      self.g_output_unit = self.create_output_unit(self.g_params) # maps h_t to o_t (output token logits)
      
    # sequence of tokens generated by the generator
    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length])
    # get from rollout policy and discriminator
    self.rewards = tf.placeholder(tf.float32, shape=[self.batch_size, self.seq_length])
    
    with tf.device('gpu:0'):
      # seq_length * batch_size
      self.processed_x = tf.transpose(tf.nn.embedding_lookup(self.g_embeddings, self.X_input), perm=[1,0,2])
      
    self.gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.seq_length, dynamic_size=False, infer_shape=True)
    self.gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.seq_length, dynamic_size=False, infer_shape=True)
    
    def g_recursion(i, x_t, h_tm1, gen_o, gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      o_t = self.g_output_unit(h_t)
      log_prob = tf.log(tf.nn.softmax(o_t))
      next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
      x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)
      gen_o = gen_o.write(i, tf.reduce_sum(tf.multiply(
          tf.one_hot(next_token, self.emb_size, 1.0, 0.0), tf.nn.softmax(o_t)), 1))
      gen_x = gen_x.write(i, next_token)
      
      return i+1, x_tp1, h_t, gen_o, gen_x
    
    _, _, _, self.gen_o, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, _3, _4: i < self.seq_length, 
        body=g_recursion, 
        loop_vars=(
            tf.constant(0, dtype=tf.int32), 
            tf.nn.embedding_lookup(self.g_embeddings, self.start_token), 
            self.h0, self.gen_o, self.gen_x
        )
    )
    
    self.gen_x = self.gen_x.stack() # seq_length * batch_size
    self.gen_x = tf.transpose(self.gen_x, perm=[1,0]) # batch_size * seq_length
    
    # Supervised pretraining for generator
    #----------------------------------------
    g_predictions = tensor_array_ops.TensorArray(
        dtype=tf.float32, size=self.seq_length, dynamic_size=False, infer_shape=True
    )
    
    ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.seq_length)
    ta_emb_x = ta_emb_x.unstack(self.processed_x)
    
    def pretrain_recursion(i, x_t, h_tm1, g_predictions):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      o_t = self.g_output_unit(h_t)
      g_predictions = g_predictions.write(i, tf.nn.softmax(o_t))
      x_tp1 = ta_emb_x.read(i)
      
      return i+1, x_tp1, h_t, g_predictions
    
    _, _, _, self.g_predictions = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, _3: i < self.seq_length,
        body=pretrain_recursion,
        loop_vars=(
            tf.constant(0, dtype=tf.int32),
            tf.nn.embedding_lookup(self.g_embeddings, self.start_token),
            self.h0, g_predictions
        )
    )

    self.g_predictions = tf.transpose(self.g_predictions.stack(), perm=[1,0,2])
    
    # loss of pretraining
    self.pretrain_loss = -tf.reduce_sum(
        tf.one_hot(tf.to_int32(tf.reshape(self.X_input, [-1])), self.emb_size, 1.0, 0.0) * tf.log(
            tf.clip_by_value(tf.reshape(self.g_predictions, [-1, self.emb_size]), 1e-20, 1.0)
        )
    ) / (self.seq_length * self.batch_size)
    
    # updates from training
    pretrain_opt = self.g_optimizer(self.learning_rate)
    
    self.pretrain_grad, _ = tf.clip_by_global_norm(tf.gradients(self.pretrain_loss, self.g_params), self.grad_clip)
    self.pretrain_updates = pretrain_opt.apply_gradients(zip(self.pretrain_grad, self.g_params))
    
    # Reinforcement / Unsupervised training
    #-----------------------------------------
    self.g_loss = -tf.reduce_sum(
        tf.reduce_sum(
            tf.one_hot(tf.to_int32(tf.reshape(self.X_input, [-1])), self.emb_size, 1.0, 0.0) * tf.log(
                tf.clip_by_value(tf.reshape(self.g_predictions, [-1, self.emb_size]), 1e-20, 1.0)
            ), 1
        ) * tf.reshape(self.rewards, [-1])
    )
    
    g_opt = self.g_optimizer(self.learning_rate)
    
    self.g_grad, _ = tf.clip_by_global_norm(tf.gradients(self.g_loss, self.g_params), self.grad_clip)
    self.g_updates = g_opt.apply_gradients(zip(self.g_grad, self.g_params))
    
  def g_optimizer(self, *args, **kwargs):
    return tf.train.AdamOptimizer(*args, **kwargs)
    
  def generate(self, sess):
    outputs = sess.run(self.gen_x)
    return outputs
  
  def pretrain_step(self, sess, x):
    outputs = sess.run([self.pretrain_updates, self.pretrain_loss], feed_dict={self.X_input: x})
    return outputs
  
  def create_recurrent_unit(self, params):
    # weights and bias for input and hidden tensor
    self.Wi = tf.Variable(tf.random_normal([self.emb_dim, self.hidden_dim]))
    self.Ui = tf.Variable(tf.random_normal([self.hidden_dim, self.hidden_dim]))
    self.bi = tf.Variable(tf.random_normal([self.hidden_dim]))

    self.Wf = tf.Variable(tf.random_normal([self.emb_dim, self.hidden_dim]))
    self.Uf = tf.Variable(tf.random_normal([self.hidden_dim, self.hidden_dim]))
    self.bf = tf.Variable(tf.random_normal([self.hidden_dim]))

    self.Wog = tf.Variable(tf.random_normal([self.emb_dim, self.hidden_dim]))
    self.Uog = tf.Variable(tf.random_normal([self.hidden_dim, self.hidden_dim]))
    self.bog = tf.Variable(tf.random_normal([self.hidden_dim]))

    self.Wc = tf.Variable(tf.random_normal([self.emb_dim, self.hidden_dim]))
    self.Uc = tf.Variable(tf.random_normal([self.hidden_dim, self.hidden_dim]))
    self.bc = tf.Variable(tf.random_normal([self.hidden_dim]))

    params.extend([self.Wi, self.Ui, self.bi, self.Wf, self.Uf, self.bf, 
                   self.Wog, self.Uog, self.bog, self.Wc, self.Uc, self.bc])

    def unit(x, hidden_memory):
      prev_hidden_state, c_prev = tf.unstack(hidden_memory)

      # Input Gate
      i = tf.sigmoid(tf.matmul(x, self.Wi) + tf.matmul(prev_hidden_state, self.Ui) + self.bi)
      # Forget Gate
      f = tf.sigmoid(tf.matmul(x, self.Wf) + tf.matmul(prev_hidden_state, self.Uf) + self.bf)
      # Output Gate
      o = tf.sigmoid(tf.matmul(x, self.Wog) + tf.matmul(prev_hidden_state, self.Uog) + self.bog)

      # New Memory Cell
      c_ = tf.nn.tanh(tf.matmul(x, self.Wc) + tf.matmul(prev_hidden_state, self.Uc) + self.bc)
      # Final Memory Cell
      c = f * c_prev + i * c_

      # Current Hidden State
      curr_hidden_state = o * tf.nn.tanh(c)

      return tf.stack([curr_hidden_state, c])

    return unit

  def create_output_unit(self, params):
    self.Wo = tf.Variable(tf.random_normal([self.hidden_dim, self.emb_size]))
    self.bo = tf.Variable(tf.random_normal([self.emb_size]))
    params.extend([self.Wo, self.bo])

    def unit(hidden_memory_tuple):
      hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
      logits = tf.matmul(hidden_state, self.Wo) + self.bo

      return logits

    return unit


In [0]:
class Rollout:
  def __init__(self, lstm, update_rate):
    self.lstm = lstm
    self.update_rate = update_rate
    
    self.batch_size = self.lstm.batch_size
    self.seq_length = self.lstm.seq_length
    self.emb_size = self.lstm.emb_size
    self.emb_dim = self.lstm.emb_dim
    self.hidden_dim = self.lstm.hidden_dim
    self.start_token = tf.identity(self.lstm.start_token)
    self.learning_rate = self.lstm.learning_rate
    
    self.g_embeddings = tf.identity(self.lstm.g_embeddings)
    self.g_recurrent_unit = self.create_recurrent_unit()
    self.g_output_unit = self.create_output_unit()
    
    # sequence of tokens generated by the generator
    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length])
    self.given_num = tf.placeholder(tf.int32)
    
    with tf.device('gpu:0'):
      # seq_length * batch_size
      self.processed_x = tf.transpose(tf.nn.embedding_lookup(self.g_embeddings, self.X_input), perm=[1,0,2])
      
    ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.seq_length)
    ta_emb_x = ta_emb_x.unstack(self.processed_x)
    
    ta_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.seq_length)
    ta_x = ta_x.unstack(tf.transpose(self.X_input, perm=[1,0]))
    
    self.h0 = tf.zeros([self.batch_size, self.hidden_dim])
    self.h0 = tf.stack([self.h0, self.h0])
    
    gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.seq_length, dynamic_size=False, infer_shape=True)
    
    # When current index i < given_num, use the provided tokens as the input at each time step
    def g_recursion_1(i, x_t, h_tm1, given_num, gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      x_tp1 = ta_emb_x.read(i)
      gen_x = gen_x.write(i, ta_x.read(i))
      
      return i+1, x_tp1, h_t, given_num, gen_x
    
    # When current index i >= given_num, start roll-out, use the output as time step t as the input at time step t+1
    def g_recursion_2(i, x_t, h_tm1, given_num, gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
      o_t = self.g_output_unit(h_t)  # batch x vocab , logits not prob
      log_prob = tf.log(tf.nn.softmax(o_t))
      next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
      x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
      gen_x = gen_x.write(i, next_token)  # indices, batch_size
      return i + 1, x_tp1, h_t, given_num, gen_x
    
    i, x_t, h_tm1, given_num, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, given_num, _4: i < given_num,
        body=g_recursion_1,
        loop_vars=(
            tf.constant(0, dtype=tf.int32),
            tf.nn.embedding_lookup(self.g_embeddings, self.start_token), 
            self.h0, self.given_num, gen_x
        )
    )
    
    _, _, _, _, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, _3, _4: i < self.seq_length, 
        body=g_recursion_2, 
        loop_vars=(i, x_t, h_tm1, given_num, self.gen_x)
    )
    
    self.gen_x = self.gen_x.stack() # seq_length * batch_size
    self.gen_x = tf.transpose(self.gen_x, perm=[1,0]) # batch_size * seq_length
    
  def get_reward(self, sess, X_input, rollout_num, dis):
    rewards = []
    for i in tnrange(rollout_num):
      # given num between 1 and seq_length - 1 for a part complete setence
      for given_num in tnrange(1, self.seq_length):
        feed = {self.X_input: X_input, self.given_num: given_num}
        samples = sess.run(self.gen_x, feed)
        feed = {dis.X_input: samples, dis.dropout_keep_prob: 1.0}
        ypred_for_auc = sess.run(dis.ypred_for_auc, feed)
        ypred = np.array([item[1] for item in ypred_for_auc])
        if i == 0:
          rewards.append(ypred)
        else:
          rewards[given_num-1] += ypred
          
      # last token reward
      feed = {dis.X_input: X_input, dis.dropout_keep_prob: 1.0}
      ypred_for_auc = sess.run(dis.ypred_for_auc, feed)
      ypred = np.array([item[1] for item in ypred_for_auc])
      if i == 0:
        rewards.append(ypred)
      else:
        # complete sentence reward
        rewards[self.seq_length-1] += ypred
        
    rewards = np.transpose(np.array(rewards)) / (1.0 * rollout_num)
    return rewards
  
  def create_recurrent_unit(self):
    # weights and bias for input and hidden tensor
    self.Wi = tf.identity(self.lstm.Wi)
    self.Ui = tf.identity(self.lstm.Ui)
    self.bi = tf.identity(self.lstm.bi)

    self.Wf = tf.identity(self.lstm.Wf)
    self.Uf = tf.identity(self.lstm.Uf)
    self.bf = tf.identity(self.lstm.bf)

    self.Wog = tf.identity(self.lstm.Wog)
    self.Uog = tf.identity(self.lstm.Uog)
    self.bog = tf.identity(self.lstm.bog)

    self.Wc = tf.identity(self.lstm.Wc)
    self.Uc = tf.identity(self.lstm.Uc)
    self.bc = tf.identity(self.lstm.bc)

    def unit(x, hidden_memory):
      prev_hidden_state, c_prev = tf.unstack(hidden_memory)

      # Input Gate
      i = tf.sigmoid(tf.matmul(x, self.Wi) + tf.matmul(prev_hidden_state, self.Ui) + self.bi)
      # Forget Gate
      f = tf.sigmoid(tf.matmul(x, self.Wf) + tf.matmul(prev_hidden_state, self.Uf) + self.bf)
      # Output Gate
      o = tf.sigmoid(tf.matmul(x, self.Wog) + tf.matmul(prev_hidden_state, self.Uog) + self.bog)

      # New Memory Cell
      c_ = tf.nn.tanh(tf.matmul(x, self.Wc) + tf.matmul(prev_hidden_state, self.Uc) + self.bc)
      # Final Memory Cell
      c = f * c_prev + i * c_

      # Current Hidden State
      curr_hidden_state = o * tf.nn.tanh(c)

      return tf.stack([curr_hidden_state, c])

    return unit
  
    def update_recurrent_unit(self):
      # Weights and Bias for input and hidden tensor
      self.Wi = self.update_rate * self.Wi + (1 - self.update_rate) * tf.identity(self.lstm.Wi)
      self.Ui = self.update_rate * self.Ui + (1 - self.update_rate) * tf.identity(self.lstm.Ui)
      self.bi = self.update_rate * self.bi + (1 - self.update_rate) * tf.identity(self.lstm.bi)

      self.Wf = self.update_rate * self.Wf + (1 - self.update_rate) * tf.identity(self.lstm.Wf)
      self.Uf = self.update_rate * self.Uf + (1 - self.update_rate) * tf.identity(self.lstm.Uf)
      self.bf = self.update_rate * self.bf + (1 - self.update_rate) * tf.identity(self.lstm.bf)

      self.Wog = self.update_rate * self.Wog + (1 - self.update_rate) * tf.identity(self.lstm.Wog)
      self.Uog = self.update_rate * self.Uog + (1 - self.update_rate) * tf.identity(self.lstm.Uog)
      self.bog = self.update_rate * self.bog + (1 - self.update_rate) * tf.identity(self.lstm.bog)

      self.Wc = self.update_rate * self.Wc + (1 - self.update_rate) * tf.identity(self.lstm.Wc)
      self.Uc = self.update_rate * self.Uc + (1 - self.update_rate) * tf.identity(self.lstm.Uc)
      self.bc = self.update_rate * self.bc + (1 - self.update_rate) * tf.identity(self.lstm.bc)

      def unit(x, hidden_memory_tm1):
        previous_hidden_state, c_prev = tf.unstack(hidden_memory_tm1)

        # Input Gate
        i = tf.sigmoid(tf.matmul(x, self.Wi) + tf.matmul(previous_hidden_state, self.Ui) + self.bi)
        # Forget Gate
        f = tf.sigmoid(tf.matmul(x, self.Wf) + f.matmul(previous_hidden_state, self.Uf) + self.bf)
        # Output Gate
        o = tf.sigmoid(tf.matmul(x, self.Wog) + f.matmul(previous_hidden_state, self.Uog) + self.bog)

        # New Memory Cell
        c_ = tf.nn.tanh(tf.matmul(x, self.Wc) + f.matmul(previous_hidden_state, self.Uc) + self.bc)
        # Final Memory cell
        c = f * c_prev + i * c_

        # Current Hidden state
        curr_hidden_state = o * tf.nn.tanh(c)

        return tf.stack([cur_hidden_state, c])

      return unit

  def create_output_unit(self):
    self.Wo = tf.identity(self.lstm.Wo)
    self.bo = tf.identity(self.lstm.bo)
    
    def unit(hidden_memory_tuple):
      hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
      logits = tf.matmul(hidden_state, self.Wo) + self.bo

      return logits

    return unit
  
  def update_output_unit(self):
    self.Wo = self.update_rate * self.Wo + (1 - self.update_rate) * tf.identity(self.lstm.Wo)
    self.bo = self.update_rate * self.bo + (1 - self.update_rate) * tf.identity(self.lstm.bo)

    def unit(hidden_memory_tuple):
      hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
      logits = tf.matmul(hidden_state, self.Wo) + self.bo

      return logits

    return unit
  
  def update_params(self):
    self.g_embeddings = tf.identity(self.lstm.g_embeddings)
    self.g_recurrent_unit = self.update_recurrent_unit()
    self.g_output_unit = self.update_output_unit()

In [30]:
generator = Generator(BATCH_SIZE, SEQ_LENGTH, VOCAB_SIZE, G_EMB_SIZE, 
                      G_EMB_DIM, G_HIDDEN_DIM, G_START_TOKEN, 0.01, 0.95)

rollout = Rollout(generator, 0.8)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

for total_batch in tnrange(1, desc='batch_loop'):
  # train generator for one step
  for it in range(1):
    samples = generator.generate(sess)
    rewards = rollout.get_reward(sess, samples, 16, discriminator)
    feed = {generator.X_input: samples, generator.rewards: rewards}
    _ = sess.run(generator.g_updates, feed_dict=feed)


[[88 88 88 ... 88 84 97]
 [51 88 60 ... 98 96 14]
 [97 33 88 ... 88 14 33]
 ...
 [14 60 97 ... 33 88 67]
 [88 90 83 ... 88 63 63]
 [97 97 69 ... 94 88 88]]
[[0.9799072  0.97886205 0.9790463  ... 0.92636746 0.927194   0.9263591 ]
 [0.97759557 0.9814943  0.98014665 ... 0.95218873 0.95359933 0.9539719 ]
 [0.97904336 0.9833619  0.98589325 ... 0.98127097 0.98152435 0.98153496]
 ...
 [0.9645319  0.9817629  0.97586524 ... 0.985156   0.98506635 0.9852057 ]
 [0.9824933  0.97933173 0.9802882  ... 0.99113446 0.9915313  0.9915325 ]
 [0.9764486  0.97462296 0.976124   ... 0.9808761  0.9803121  0.9798367 ]]
None
