<a href="https://colab.research.google.com/github/hogch/masterproject_gan/blob/master/SeqGAN_headlines_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text Generation using SeqGAN

This notebook generates news headlines using the Machine Learning technology GAN (Generative Adversarial Networks).

## Import dependencies



In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
%cd /content/drive/My\ Drive/Colab\ Notebooks/Masterproject
!ls

**Install required dependencies manually**

In [0]:
!pip install tflearn
!pip install tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip
!pip install tensorboardcolab

**Import required modules**

In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import datetime

from keras.preprocessing.text import Tokenizer
from tflearn.data_utils import pad_sequences, to_categorical
from tensorflow.contrib import slim
from tqdm import tqdm, tnrange

from tensorboardcolab import *

In [0]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

## Load Dataset

In [0]:
"""df_real = pd.read_csv('headlines.csv', sep=',', usecols=['text', 'fake'])
df_real = df_real.sample(frac=1)
df_real = df_real[:500000]
print(df_real.shape)
df_real.head()"""

df_real = pd.read_csv('headlines_short.csv', sep=',', usecols=['text', 'fake'])
df_real = df_real.sample(frac=1)
print(df_real.shape)
df_real.head()

In [0]:
df_fake = pd.DataFrame(columns=['text', 'fake'])
df_evaluation = pd.DataFrame(columns=['text', 'fake'])

## Define Test-, Trainingset and Hyper-Parameter

In [0]:
# General Hyper-Parameter
BATCH_SIZE = 64
SEQ_LENGTH = 5 # average sequence length of the given sentences in the dataset
TRAINING_SPLIT = 1
TOTAL_EPOCHS = 200

# Discriminator Hyper-Parameter
D_PRETRAIN_EPOCHS = 50
D_EPOCHS = 1#3
D_EMB_SIZE = 300
D_NUM_CLASSES = 2
D_FILTER_SIZES = [1,2,3,4,5]
D_NUM_FILTERS = 128
D_DROPOUT = 0.5
D_LEARNING_RATE = 0.001

# Generator Hyper-Parameter
G_PRETRAIN_EPOCHS = 100
G_EPOCHS = 10#5
G_EMB_SIZE = 300
G_HIDDEN_LAYER_SIZES = [64] # hidden state dimension of lstm cell
G_TEMPERATURE = 0.5
G_DROPOUT = 0.5
G_LEARNING_RATE = 0.01

In [0]:
def load_data(df):
  texts = []
  labels = []
  
  for row in zip(df['text'], df['fake']):
    texts.append(row[0].strip())
    labels.append(row[1])
  
  return texts, labels

In [0]:
tokenizer = Tokenizer(lower=False)
texts, labels = load_data(df_real)
tokenizer.fit_on_texts(texts)

#sorted_word_count = sorted(tokenizer.word_counts.items(), key=lambda x: x[1], reverse=True)
#WORD_INDEX_BIG = [word for idx, word in enumerate(tokenizer.word_index)]
WORD_INDEX = [w for w, c in tokenizer.word_counts.items() if c > 5]

VOCAB_SIZE = len(WORD_INDEX)

print(VOCAB_SIZE)
print(WORD_INDEX)

In [0]:
import random

def get_datasets(texts, labels):
  sequences = tokenizer.texts_to_sequences(texts)
  text_seq = pad_sequences(sequences, maxlen=SEQ_LENGTH)
    
  labels = np.asarray(labels)
  indices = np.arange(text_seq.shape[0])
  np.random.shuffle(indices)
  text_seq = text_seq[indices]
  labels = labels[indices]
  
  test_size = int(TRAINING_SPLIT * text_seq.shape[0])
  
  X_train = text_seq[:test_size]
  y_train = to_categorical(labels[:test_size], 2)
  X_test = text_seq[test_size:]
  y_test = to_categorical(labels[test_size:], 2)
  
  return X_train[:BATCH_SIZE], y_train[:BATCH_SIZE], X_test[:BATCH_SIZE], y_test[:BATCH_SIZE]
  
def get_evaluation_data():
  global df_evaluation
  df_evaluation = df_evaluation.sample(frac=1)
  
  texts, labels = load_data(df_evaluation)
  return get_datasets(texts, labels)
  
def get_fake_data():
  global df_fake
  df_fake = df_fake.sample(frac=1)
  
  texts, labels = load_data(df_fake)
  return get_datasets(texts, labels)

def get_real_data():
  global df_real
  df_real = df_real.sample(frac=1)

  texts, labels = load_data(df_real)
  return get_datasets(texts, labels)

def get_mixed_data():
  # global notation ???
  real = df_real.sample(frac=1)
  fake = df_fake.sample(frac=1)
  df = pd.concat([real[:BATCH_SIZE*6], fake[:BATCH_SIZE*6]])
  df = df.sample(frac=1)
  
  texts, labels = load_data(df)
  return get_datasets(texts, labels)


## Discriminator
model for classifying sequences (here headlines) as real or fake.
In this implementation the discriminative model uses following layers: 
1.   embedding layer
2.   convolution layer with max-pooling operation
4.   softmax layer

In [0]:
class Discriminator:
  def __init__(self, batch_size, vocab_size, seq_length, emb_size, num_classes, 
               filter_sizes, num_filters, learning_rate):
    self.batch_size = batch_size
    self.vocab_size = vocab_size
    self.seq_length = seq_length
    self.emb_size = emb_size
    self.num_classes = num_classes
    self.filter_sizes = filter_sizes
    self.num_filters = num_filters
    self.learning_rate = learning_rate

    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length], name='d_X_input')
    self.y_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.num_classes], name='d_y_input')
    self.dropout_keep_prob = tf.placeholder(tf.float32, name='d_dropout_keep_prob')
    
    # Keeping track of l2 regularization loss (optional)
    self.l2_reg_lambda = 0.2
    self.l2_loss = tf.constant(0.0)

  def build_model(self):
    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
      self.embedding_layer = self.build_embedding_layer()
      self.convolution_maxpool_layer = self.build_convolution_maxpool_layer()
      self.scores, self.predictions = self.build_softmax_layer()
      
      # PRETRAINING
      self.pretrain_loss = self.calc_mean_cross_entropy_loss()
      self.pretrain_accuracy = self.calc_accuracy()
      self.pretrain_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.pretrain_loss)

      # TRAINING
      self.loss = self.calc_mean_cross_entropy_loss()
      self.accuracy = self.calc_accuracy()
      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
      
      # SUMMARY
      self.d_pretrain_summary = tf.summary.merge([
          tf.summary.scalar('d_pretrain_loss', self.pretrain_loss),
          tf.summary.scalar('d_pretrain_accuracy', self.pretrain_accuracy)
      ])
      self.d_summary = tf.summary.merge([
          tf.summary.scalar('d_loss', self.loss),
          tf.summary.scalar('d_accuracy', self.accuracy)
      ])
        
  def build_embedding_layer(self):
    with tf.device('gpu:0'), tf.name_scope('d_embedding_layer'):
      emb_matrix = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_size], -1.0, 1.0))
      emb_lookup = tf.nn.embedding_lookup(emb_matrix, self.X_input)
      emb_lookup_expand = tf.expand_dims(emb_lookup, -1)
      
      return emb_lookup_expand
    
  def build_convolution_maxpool_layer(self):
    with tf.name_scope('d_convolution_maxpool_layer'):
      pooled_outputs = []
      for filter_size in self.filter_sizes:
        with tf.name_scope('d_conv-maxpool-%s' % filter_size):
          # Convolution Layer
          filter_shape = [filter_size, self.emb_size, 1, self.num_filters]
          W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='d_W')
          b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name='d_b')
          conv = tf.nn.conv2d(
              input=self.embedding_layer,
              filter=W,
              strides=[1,1,1,1], # the filter is applied to the input in one-pixel intervals in each dimension, corresponding to a “full” convolution
              padding='VALID',
              name='d_conv'
          )
          # Apply non-linearity - activation function
          activation = tf.nn.relu(tf.nn.bias_add(conv, b), name='d_relu')
          # Maxpooling over outputs
          max_pooling = tf.nn.max_pool(
              value=activation,
              ksize=[1, self.seq_length-filter_size+1, 1, 1],
              strides=[1,1,1,1],
              padding='VALID',
              name='max_pooling'
          )
          pooled_outputs.append(max_pooling)

      # combine all the pooled features
      self.num_filter_total = self.num_filters * len(self.filter_sizes)
      h_pool = tf.concat(pooled_outputs, axis=3)
      
      return tf.reshape(h_pool, [-1, self.num_filter_total])
        
  def build_softmax_layer(self): 
    with tf.name_scope('highway'):
      self.h_highway = self.highway(
          self.convolution_maxpool_layer, self.convolution_maxpool_layer.get_shape()[1], 1, 0
      )

    with tf.name_scope('dropout'):
      self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob)
      
    with tf.name_scope('softmax_output'):
      W_softmax = tf.Variable(
          tf.truncated_normal(
              [self.num_filter_total, self.num_classes], 
              stddev=0.1
          ), name='d_W_softmax'
      )
      b_softmax = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name='d_b_softmax')
            
      self.l2_loss += tf.nn.l2_loss(W_softmax)
      self.l2_loss += tf.nn.l2_loss(b_softmax)
      
      self.scores = tf.nn.xw_plus_b(self.h_drop, W_softmax, b_softmax, name='d_scores')
      #self.scores = tf.matmul(self.convolution_maxpool_layer, W_softmax) + b_softmax
      self.truth_prob = tf.nn.softmax(self.scores, -1)[:, 1]
      predictions = tf.argmax(self.scores, 1, name='d_predictions')
      
    return self.scores, predictions
  
  def calc_mean_cross_entropy_loss(self):
    with tf.name_scope('d_loss'):
      losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.scores, labels=self.y_input)
      loss = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss
      
      return loss
      
  def calc_accuracy(self):
    with tf.name_scope('d_accuracy'):
      correct_predictions = tf.equal(self.predictions, tf.argmax(self.y_input, 1))
      accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')
      
      return accuracy
      
  def highway(self, input_, size, num_layers=1, bias=-2.0, f=tf.nn.relu):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).
    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y
    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
    """
    with tf.variable_scope('highway'):
      size = int(size)
      for idx in range(num_layers):
        g = f(slim.fully_connected(input_, size, scope='highway_lin_%d' % idx, activation_fn=None))
        t = tf.sigmoid(slim.fully_connected(input_, size, scope='highway_gate_%d' % idx, activation_fn=None) + bias)

        output = t * g + (1. - t) * input_
        input_ = output
        
    return output
  
  def get_truth_prob(self, sess, X):
    feed_dict = { self.X_input: X, self.dropout_keep_prob: 1.0 }
        
    return sess.run(self.truth_prob, feed_dict=feed_dict)
  
  def pretrain(self, sess, X, y, dropout):
    feed_dict = {
        self.X_input: X,
        self.y_input: y,
        self.dropout_keep_prob: dropout
    }
    _, summary, loss, acc = sess.run([
        self.pretrain_optimizer, 
        self.d_pretrain_summary,
        self.pretrain_loss,
        self.pretrain_accuracy
    ], feed_dict=feed_dict)
    
    return summary, loss, acc
        
  def train(self, sess, X, y, dropout):
    feed_dict = {
        self.X_input: X,
        self.y_input: y,
        self.dropout_keep_prob: dropout
    }
    _, summary, loss, acc = sess.run([
        self.optimizer, 
        self.d_summary,
        self.loss,
        self.accuracy
    ], feed_dict=feed_dict)
    
    return summary, loss, acc

## Build Discriminator model and train model

In [0]:
tf.reset_default_graph()

discriminator = Discriminator(BATCH_SIZE, VOCAB_SIZE, SEQ_LENGTH, D_EMB_SIZE, 
                              D_NUM_CLASSES, D_FILTER_SIZES, D_NUM_FILTERS, D_LEARNING_RATE)
discriminator.build_model()

## Generator

LSTM with Reinforcement Learning for sequence generation.

In [0]:
from tensorflow.contrib import rnn, layers, seq2seq, slim
import random

# inspired by https://www.oreilly.com/ideas/introduction-to-lstms-with-tensorflow

class Generator:
  def __init__(self, batch_size, seq_length, vocab_size, emb_size, temperature,
               hidden_layer_sizes, word_index, learning_rate):
    self.batch_size = batch_size
    self.seq_length = seq_length
    self.vocab_size = vocab_size
    self.emb_size = emb_size
    self.hidden_layer_sizes = hidden_layer_sizes
    self.word_index = word_index
    self.learning_rate = learning_rate
    self.temperature = temperature
    
    self.grad_clip = 5.0
    
    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length], name='g_X_input')
    self.dropout_keep_prob = tf.placeholder(tf.float32, name='g_dropout_keep_prob')
    self.rewards = tf.placeholder(tf.float32, shape=[self.batch_size, self.seq_length], name='rewards')
    
  def build_model(self):
    with tf.variable_scope('generator', reuse=tf.AUTO_REUSE):
      self.embedding_layer = self.build_embedding_layer(self.X_input)
      self.outputs, final_state = self.build_lstm_layers()
      
      # PRETRAINING
      pretrain_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
      pretrain_predictions, self.pretrain_loss = self.get_prediction_and_loss()
      self.pretrain_operation = slim.learning.create_train_op(
          self.pretrain_loss, pretrain_optimizer, clip_gradient_norm=self.grad_clip
      )
      
      # TRAINING
      self.predictions, self.loss = self.get_prediction_and_loss()
      optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
      self.train_operation = slim.learning.create_train_op(
          self.loss, optimizer, clip_gradient_norm=self.grad_clip
      )
                  
      # SUMMARY
      self.g_pretrain_summary = tf.summary.scalar('g_pretrain_loss', self.pretrain_loss)
      self.g_train_summary = tf.summary.merge([
          tf.summary.scalar('g_loss', self.loss),
          tf.summary.scalar('g_reward', tf.reduce_mean(self.rewards))
      ])
        
  def build_embedding_layer(self, X_input):
    with tf.name_scope('g_embedding_layer'):
      emb_matrix = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_size], -1.0, 1.0))
      emb_lookup = tf.nn.embedding_lookup(emb_matrix, X_input)
      
    return emb_lookup
      
  def build_lstm_layers(self):
    with tf.name_scope('g_lstm_layers'):
      layers = [rnn.LSTMCell(layer_size) for layer_size in self.hidden_layer_sizes]
      dropouts = [rnn.DropoutWrapper(layer, output_keep_prob=self.dropout_keep_prob) for layer in layers]
      cell = rnn.MultiRNNCell(dropouts) # , state_is_tuple=True)?
      
      initial_state = cell.zero_state(self.batch_size, tf.float32)
      outputs, final_state = tf.nn.dynamic_rnn(cell, self.embedding_layer, initial_state=initial_state)
      
    return outputs, final_state
  
  def get_prediction_and_loss(self):
    predictions = []
    W2 = tf.Variable(tf.random_normal([self.hidden_layer_sizes[-1], self.vocab_size]), dtype=tf.float32)
    b2 = tf.Variable(tf.zeros([1, self.vocab_size]), dtype=tf.float32)
      
    output = tf.reshape(self.outputs, [-1, self.hidden_layer_sizes[-1]])
    logits = tf.matmul(output, W2) + b2 # broadcasted addition
    predictions.append(tf.nn.softmax(tf.divide(logits, self.temperature)))
      
    loss = -tf.reduce_sum(
      tf.one_hot(tf.to_int32(tf.reshape(self.X_input, [-1])), self.vocab_size, 1.0, 0.0) * 
      tf.log(tf.clip_by_value(tf.reshape(predictions[-1], [-1, self.vocab_size]), 1e-20, 1.0))
    ) / (self.seq_length * self.batch_size)
    
    return predictions, loss
      
  def generate(self, sess, given_tokens, dropout):
    feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
    samples = sess.run([self.predictions], feed_dict=feed_dict)
    sentences, sequences = self.translate_samples(samples)
    
    return sentences, sequences
    
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  # REINFORCEMENT LEARNING  
  def get_reward(self, sess, given_tokens, rollout_num, dis, dropout):
    rewards = np.zeros((self.batch_size, self.seq_length))
        
    for i in range(rollout_num):
      for given_num in range(1, self.seq_length):
        feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
        samples = sess.run(self.predictions, feed_dict=feed_dict)
        sentences, sequence = self.translate_samples(samples)
        rewards[:, given_num] += dis.get_truth_prob(sess, sequence)
            
    rewards /= rollout_num
    return rewards
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  
  def pretrain(self, sess, given_tokens, dropout):
    feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
    _, summary, pretrain_loss = sess.run([
        self.pretrain_operation, 
        self.g_pretrain_summary,
        self.pretrain_loss
    ], feed_dict=feed_dict)
    
    return summary, pretrain_loss
  
  def train(self, sess, given_tokens, rewards, dropout):
    feed_dict = { self.X_input: given_tokens, self.rewards: rewards, self.dropout_keep_prob: dropout }
    _, summary, loss, rewards = sess.run([
        self.train_operation, 
        self.g_train_summary, 
        self.loss, 
        tf.reduce_mean(self.rewards)
    ], feed_dict=feed_dict)
    
    return summary, loss, rewards
  
  def translate_samples(self, sequence):
    batch_softmax = np.reshape(sequence, [self.batch_size, self.seq_length, self.vocab_size])

    sentences = []
    vectors = []
    for sequence in batch_softmax:
      sentence = ''
      vector = []
      for pos in sequence:
        vector_position = np.argmax(pos)
        vector.append(vector_position)
        word = self.word_index[vector_position]
        sentence += word
        sentence += ' '

      sentences.append(sentence)
      vectors.append(vector)

    vectors = np.asarray(vectors)
    return sentences, vectors

## Build Generator model

In [0]:
generator = Generator(BATCH_SIZE, SEQ_LENGTH, VOCAB_SIZE, G_EMB_SIZE, G_TEMPERATURE,
                      G_HIDDEN_LAYER_SIZES, WORD_INDEX, G_LEARNING_RATE)

target_lstm = Generator(BATCH_SIZE, SEQ_LENGTH, VOCAB_SIZE, G_EMB_SIZE, G_TEMPERATURE,
                        G_HIDDEN_LAYER_SIZES, WORD_INDEX, G_LEARNING_RATE)

generator.build_model()
target_lstm.build_model()

## Start Adversarial Training

In [0]:
tbc=TensorBoardColab()

LOG_DIR = '/tmp/log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
def target_loss(sess, target_lstm, X_train):
  # target_loss means the oracle negative log-likelihood tested with the oracle model "target_lstm"
  # For more details, please see the Section 4 in https://arxiv.org/abs/1609.05473
  nll = []

  for _ in range(BATCH_SIZE):
    fake_sentences, fake_sequences = target_lstm.generate(sess, X_train, G_DROPOUT)
    summary, g_pretrain_loss = target_lstm.pretrain(sess, X_train, G_DROPOUT)
    nll.append(g_pretrain_loss)

  return np.mean(nll)

def add_samples(headlines, df):
  for headline in headlines:
    df = df.append({'text': headline , 'fake': 1} , ignore_index=True)
    
  return df

In [0]:
with tf.Session() as sess:
  random.seed(88)
  np.random.seed(88)
        
  print('start GAN training at', datetime.datetime.now())
  writer = tbc.get_writer()
  sess.run(tf.global_variables_initializer())
    
  # PRETRAINING GENERATOR
  for epoch in tnrange(G_PRETRAIN_EPOCHS, desc='gen_pretrain_loop'):
    X_train, y_train, X_test, y_test = get_real_data()
    summary, g_pretrain_loss = generator.pretrain(sess, X_train, G_DROPOUT)
    if epoch % 5 == 0:
      fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
      df_evaluation = add_samples(fake_sentences, df_evaluation)
      X_train, y_train, X_test, y_test = get_evaluation_data()
      test_loss = target_loss(sess, target_lstm, X_train)
      print('target_lstm_pretrain_summary: nll={0:.3f}'.format(test_loss))
      
    writer.add_summary(summary, epoch)
    print('g_pretrain_summary: pretrain_loss={0:.3f}'.format(g_pretrain_loss))
    
  # PRETRAIN DISCRIMINATOR
  for epoch in tnrange(D_PRETRAIN_EPOCHS, desc='dis_pretrain_loop'):
    X_train, y_train, X_test, y_test = get_real_data()
    fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
    df_fake = add_samples(fake_sentences, df_fake)
    
    for _ in range(3):
      for _ in range(BATCH_SIZE):
        X_train, y_train, X_test, y_test = get_mixed_data() #maybe out of this loop?
        summary, d_pretrain_loss, d_pretrain_acc = discriminator.pretrain(sess, X_train, y_train, D_DROPOUT)
      
    writer.add_summary(summary, epoch)
    print('d_pretrain_summary: d_pretrain_loss={0:.3f}, d_pretrain_accuracy={1:.3f}'.format(d_pretrain_loss, d_pretrain_acc))
    
  rollout = generator

  # ADVERSARIAL TRAINING
  for epoch in tnrange(TOTAL_EPOCHS, desc='gan_epoch_loop'):
    X_train, y_train, X_test, y_test = get_real_data()
    fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
    df_fake = add_samples(fake_sentences, df_fake)
    
    rewards = rollout.get_reward(sess, fake_sequences, 16, discriminator, G_DROPOUT)
    summary, g_loss, reward = generator.train(sess, fake_sequences, rewards, G_DROPOUT)
    writer.add_summary(summary, epoch)
    
    print('g_summary: loss={0:.3f}, reward={1:.3f}'.format(g_loss, reward))
    
    if epoch % 5 == 0:
      fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
      df_evaluation = add_samples(fake_sentences, df_evaluation)
      X_train, y_train, X_test, y_test = get_evaluation_data()
      test_loss = target_loss(sess, target_lstm, X_train)
      print('target_lstm_summary: nll={0:.3f}'.format(test_loss))
    
    for _ in tnrange(G_EPOCHS, desc='gen_train_loop'):
      X_train, y_train, X_test, y_test = get_real_data()
      fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
      df_fake = add_samples(fake_sentences, df_fake)
      
      for _ in tnrange(D_EPOCHS, desc='dis_train_loop'):
        for _ in range(BATCH_SIZE):
          X_train, y_train, X_test, y_test = get_mixed_data()
          summary, d_loss, d_acc = discriminator.train(sess, X_train, y_train, D_DROPOUT)
        
      print('d_summary: loss={0:.3f}, accuracy={1:.3f}'.format(d_loss, d_acc))
        
    writer.add_summary(summary, epoch)
    print('df_fake: ', df_fake.tail(2))
    print('df_evaluation: ', df_evaluation.tail(2))
    
  print('finish GAN training at', datetime.datetime.now())

In [0]:
from pathlib import Path

print(df_fake.tail())

count = 1
saved = False
while saved != True:
  filename = 'generated_headlines_%d.csv' % count
  filepath = Path('./results/%s' % filename)
  
  if filepath.exists():
    count = count+1
  else:
    df_fake.to_csv(filepath, sep='\t', encoding='utf-8')
    saved = True