<a href="https://colab.research.google.com/github/hogch/masterproject_gan/blob/master/SeqGAN_headlines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text Generation using SeqGAN

This notebook generates news headlines using the Machine Learning technology GAN (Generative Adversarial Networks).

## Import dependencies



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/My\ Drive/Colab\ Notebooks/Masterproject
!ls

/content/drive/My Drive/Colab Notebooks/Masterproject
abcnews-date-text.csv	    headlines_short_3_5.csv  screenshots
DEV_SeqGAN_headlines.ipynb  headlines_short.csv      SeqGAN
Graph			    news-headlines.db	     SeqGAN_headlines.ipynb
headlines.csv		    results


**Install required dependencies manually**

In [3]:
!pip install tflearn
!pip install tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip
!pip install tensorboardcolab

Collecting https://github.com/chengs/tqdm/archive/colab.zip
  Downloading https://github.com/chengs/tqdm/archive/colab.zip
[K     | 481kB 570kB/s
Building wheels for collected packages: tqdm
  Running setup.py bdist_wheel for tqdm ... [?25l- done
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-a69so2se/wheels/41/18/ee/d5dd158441b27965855b1bbae03fa2d8a91fe645c01b419896
Successfully built tqdm
[31mthinc 6.12.1 has requirement wrapt<1.11.0,>=1.10.0, but you'll have wrapt 1.11.1 which is incompatible.[0m
[31mspacy 2.0.18 has requirement numpy>=1.15.0, but you'll have numpy 1.14.6 which is incompatible.[0m
[31mpymc3 3.6 has requirement joblib<0.13.0, but you'll have joblib 0.13.1 which is incompatible.[0m
[31mfeaturetools 0.4.1 has requirement pandas>=0.23.0, but you'll have pandas 0.22.0 which is incompatible.[0m
Installing collected packages: tqdm
  Found existing installation: tqdm 4.28.1
    Uninstalling tqdm-4.28.1:
      Successfully uninstalled tqdm-4.28.1
Success

**Import required modules**

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import datetime

from keras.preprocessing.text import Tokenizer
from tflearn.data_utils import pad_sequences, to_categorical
from tensorflow.contrib import slim
from tqdm import tqdm, tnrange

from tensorboardcolab import *

Using TensorFlow backend.


In [5]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


## Load Dataset

In [6]:
"""df_real = pd.read_csv('headlines.csv', sep=',', usecols=['text', 'fake'])
df_real = df_real.sample(frac=1)
df_real = df_real[:500000]
print(df_real.shape)
df_real.head()"""

df_real = pd.read_csv('headlines_short.csv', sep=',', usecols=['text', 'fake'])
df_real = df_real.sample(frac=1)
print(df_real.shape)
df_real.head()

(324518, 2)


Unnamed: 0,text,fake
214275,rea quickest in superbike practise,0
313988,new bunbury digital screen scrapped,0
26634,adelaide police investigate suspicious death,0
286010,walsh opens upgraded kerang levee,0
17846,dollar steadies volatile stock market,0


In [7]:
df_fake = pd.DataFrame(columns=['text', 'fake'])
print(df_fake.shape)
df_fake.head()

(0, 2)


Unnamed: 0,text,fake


## Define Test-, Trainingset and Hyper-Parameter

In [0]:
# General Hyper-Parameter
BATCH_SIZE = 64
SEQ_LENGTH = 5 # average sequence length of the given sentences in the dataset
TRAINING_SPLIT = 1
TOTAL_EPOCHS = 200

# Discriminator Hyper-Parameter
D_PRETRAIN_EPOCHS = 50
D_EPOCHS = 3
D_EMB_SIZE = 300
D_NUM_CLASSES = 2
D_FILTER_SIZES = [1,2,3,4,5]
D_NUM_FILTERS = 128
D_DROPOUT = 0.4
D_LEARNING_RATE = 0.01

# Generator Hyper-Parameter
G_PRETRAIN_EPOCHS = 1000
G_EPOCHS = 5
G_EMB_SIZE = 300
G_HIDDEN_LAYER_SIZES = [32] # hidden state dimension of lstm cell
G_TEMPERATURE = 0.5
G_DROPOUT = 0.4
G_LEARNING_RATE = 0.001

In [0]:
def load_data(df):
  texts = []
  labels = []
  
  for row in zip(df['text'], df['fake']):
    texts.append(row[0].strip())
    labels.append(row[1])
  
  return texts, labels

In [10]:
tokenizer = Tokenizer()
texts, labels = load_data(df_real)
tokenizer.fit_on_texts(texts)

#sorted_word_count = sorted(tokenizer.word_counts.items(), key=lambda x: x[1], reverse=True)
#WORD_INDEX_BIG = [word for idx, word in enumerate(tokenizer.word_index)]
WORD_INDEX = [w for w, c in tokenizer.word_counts.items() if c > 5]

VOCAB_SIZE = len(WORD_INDEX)

print(VOCAB_SIZE)

17751


In [0]:
import random

def get_datasets(texts, labels):
  sequences = tokenizer.texts_to_sequences(texts)
  text_seq = pad_sequences(sequences, maxlen=SEQ_LENGTH)
    
  labels = np.asarray(labels)
  indices = np.arange(text_seq.shape[0])
  np.random.shuffle(indices)
  text_seq = text_seq[indices]
  labels = labels[indices]
  
  test_size = int(TRAINING_SPLIT * text_seq.shape[0])
  
  X_train = text_seq[:test_size]
  y_train = to_categorical(labels[:test_size], 2)
  X_test = text_seq[test_size:]
  y_test = to_categorical(labels[test_size:], 2)
  
  return X_train[:BATCH_SIZE], y_train[:BATCH_SIZE], X_test[:BATCH_SIZE], y_test[:BATCH_SIZE]
  
def get_fake_data():
  global df_fake
  df_fake = df_fake.sample(frac=1)
  
  texts, labels = load_data(df_fake)
  return get_datasets(texts, labels)

def get_real_data():
  global df_real
  df_real = df_real.sample(frac=1)

  texts, labels = load_data(df_real)
  return get_datasets(texts, labels)

def get_mixed_data():
  # global notation ???
  real = df_real.sample(frac=1)
  fake = df_fake.sample(frac=1)
  df = pd.concat([real[:BATCH_SIZE*6], fake[:BATCH_SIZE*6]])
  df = df.sample(frac=1)
  
  texts, labels = load_data(df)
  return get_datasets(texts, labels)


## Discriminator
model for classifying sequences (here headlines) as real or fake.
In this implementation the discriminative model uses following layers: 
1.   embedding layer
2.   convolution layer with max-pooling operation
4.   softmax layer

In [0]:
class Discriminator:
  def __init__(self, batch_size, vocab_size, seq_length, emb_size, num_classes, 
               filter_sizes, num_filters, learning_rate):
    self.batch_size = batch_size
    self.vocab_size = vocab_size
    self.seq_length = seq_length
    self.emb_size = emb_size
    self.num_classes = num_classes
    self.filter_sizes = filter_sizes
    self.num_filters = num_filters
    self.learning_rate = learning_rate

    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length], name='d_X_input')
    self.y_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.num_classes], name='d_y_input')
    self.dropout_keep_prob = tf.placeholder(tf.float32, name='d_dropout_keep_prob')
    
    # Keeping track of l2 regularization loss (optional)
    self.l2_reg_lambda = 0.2
    self.l2_loss = tf.constant(0.0)

  def build_model(self):
    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
      self.embedding_layer = self.build_embedding_layer()
      self.convolution_maxpool_layer = self.build_convolution_maxpool_layer()
      self.scores, self.predictions = self.build_softmax_layer()

      self.calc_mean_cross_entropy_loss()
      self.calc_accuracy()

      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
      
      self.d_summary = tf.summary.merge([
          tf.summary.scalar('d_loss', self.loss),
          tf.summary.scalar('d_accuracy', self.accuracy)
      ])
        
  def build_embedding_layer(self):
    with tf.device('gpu:0'), tf.name_scope('d_embedding_layer'):
      emb_matrix = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_size], -1.0, 1.0))
      emb_lookup = tf.nn.embedding_lookup(emb_matrix, self.X_input)
      emb_lookup_expand = tf.expand_dims(emb_lookup, -1)
      
      return emb_lookup_expand
    
  def build_convolution_maxpool_layer(self):
    with tf.name_scope('d_convolution_maxpool_layer'):
      pooled_outputs = []
      for filter_size in self.filter_sizes:
        with tf.name_scope('d_conv-maxpool-%s' % filter_size):
          # Convolution Layer
          filter_shape = [filter_size, self.emb_size, 1, self.num_filters]
          W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='d_W')
          b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name='d_b')
          conv = tf.nn.conv2d(
              input=self.embedding_layer,
              filter=W,
              strides=[1,1,1,1], # the filter is applied to the input in one-pixel intervals in each dimension, corresponding to a “full” convolution
              padding='VALID',
              name='d_conv'
          )
          # Apply non-linearity - activation function
          activation = tf.nn.relu(tf.nn.bias_add(conv, b), name='d_relu')
          # Maxpooling over outputs
          max_pooling = tf.nn.max_pool(
              value=activation,
              ksize=[1, self.seq_length-filter_size+1, 1, 1],
              strides=[1,1,1,1],
              padding='VALID',
              name='max_pooling'
          )
          pooled_outputs.append(max_pooling)

      # combine all the pooled features
      self.num_filter_total = self.num_filters * len(self.filter_sizes)
      h_pool = tf.concat(pooled_outputs, axis=3)
      
      return tf.reshape(h_pool, [-1, self.num_filter_total])
        
  def build_softmax_layer(self): 
    with tf.name_scope('highway'):
      self.h_highway = self.highway(
          self.convolution_maxpool_layer, self.convolution_maxpool_layer.get_shape()[1], 1, 0
      )

    with tf.name_scope('dropout'):
      self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob)
      
    with tf.name_scope('softmax_output'):
      W_softmax = tf.Variable(
          tf.truncated_normal(
              [self.num_filter_total, self.num_classes], 
              stddev=0.1
          ), name='d_W_softmax'
      )
      b_softmax = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name='d_b_softmax')
            
      self.l2_loss += tf.nn.l2_loss(W_softmax)
      self.l2_loss += tf.nn.l2_loss(b_softmax)
      
      self.scores = tf.nn.xw_plus_b(self.h_drop, W_softmax, b_softmax, name='d_scores')
      #self.scores = tf.matmul(self.convolution_maxpool_layer, W_softmax) + b_softmax
      self.truth_prob = tf.nn.softmax(self.scores, -1)[:, 1]
      predictions = tf.argmax(self.scores, 1, name='d_predictions')
      
    return self.scores, predictions
  
  def calc_mean_cross_entropy_loss(self):
    with tf.name_scope('d_loss'):
      losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.scores, labels=self.y_input)
      self.loss = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss
      
  def calc_accuracy(self):
    with tf.name_scope('d_accuracy'):
      correct_predictions = tf.equal(self.predictions, tf.argmax(self.y_input, 1))
      self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')
      
  def highway(self, input_, size, num_layers=1, bias=-2.0, f=tf.nn.relu):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).
    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y
    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
    """
    with tf.variable_scope('highway'):
      size = int(size)
      for idx in range(num_layers):
        g = f(slim.fully_connected(input_, size, scope='highway_lin_%d' % idx, activation_fn=None))
        t = tf.sigmoid(slim.fully_connected(input_, size, scope='highway_gate_%d' % idx, activation_fn=None) + bias)

        output = t * g + (1. - t) * input_
        input_ = output
        
    return output
  
  def get_truth_prob(self, sess, X):
    feed_dict = { self.X_input: X, self.dropout_keep_prob: 1.0 }
        
    return sess.run(self.truth_prob, feed_dict=feed_dict)
        
  def train(self, sess, X, y, dropout):
    feed_dict = {
        self.X_input: X,
        self.y_input: y,
        self.dropout_keep_prob: dropout
    }
    _, summary, loss, acc = sess.run([
        self.optimizer, 
        self.d_summary,
        self.loss,
        self.accuracy
    ], feed_dict=feed_dict)
    
    return summary, loss, acc

## Build Discriminator model and train model

In [0]:
tf.reset_default_graph()

discriminator = Discriminator(BATCH_SIZE, VOCAB_SIZE, SEQ_LENGTH, D_EMB_SIZE, 
                              D_NUM_CLASSES, D_FILTER_SIZES, D_NUM_FILTERS, D_LEARNING_RATE)
discriminator.build_model()

## Generator

LSTM with Reinforcement Learning for sequence generation.

In [0]:
from tensorflow.contrib import rnn, layers, seq2seq, slim
import random

# inspired by https://www.oreilly.com/ideas/introduction-to-lstms-with-tensorflow

class Generator:
  def __init__(self, batch_size, seq_length, vocab_size, emb_size, temperature,
               hidden_layer_sizes, word_index, learning_rate):
    self.batch_size = batch_size
    self.seq_length = seq_length
    self.vocab_size = vocab_size
    self.emb_size = emb_size
    self.hidden_layer_sizes = hidden_layer_sizes
    self.word_index = word_index
    self.learning_rate = learning_rate
    self.temperature = temperature
    
    self.grad_clip = 5.0
    
    self.X_input = tf.placeholder(tf.int32, shape=[self.batch_size, self.seq_length], name='g_X_input')
    self.dropout_keep_prob = tf.placeholder(tf.float32, name='g_dropout_keep_prob')
    self.rewards = tf.placeholder(tf.float32, shape=[self.batch_size, self.seq_length], name='rewards')
    
  def build_model(self):
    with tf.variable_scope('generator', reuse=tf.AUTO_REUSE):
      self.embedding_layer = self.build_embedding_layer(self.X_input)
      self.outputs, final_state = self.build_lstm_layers()
      
      # PRETRAINING
      """pretrain_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
      pretrain_loss = self.get_pretrain_loss()
      self.pretrain_operation = slim.learning.create_train_op(
          pretrain_loss, pretrain_optimizer, clip_gradient_norm=self.grad_clip
      )"""
      
      # TRAINING
      self.predictions, self.loss = self.get_prediction_and_loss()
      optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
      self.train_operation = slim.learning.create_train_op(
          self.loss, optimizer, clip_gradient_norm=self.grad_clip
      )
                  
      # SUMMARY
      #self.g_pretrain_summary = tf.summary.scalar('g_pretrain_loss', pretrain_loss)
      self.g_train_summary = tf.summary.merge([
          tf.summary.scalar('g_loss', self.loss),
          tf.summary.scalar('g_reward', tf.reduce_mean(self.rewards))
      ])
        
  def build_embedding_layer(self, X_input):
    with tf.name_scope('g_embedding_layer'):
      emb_matrix = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_size], -1.0, 1.0))
      emb_lookup = tf.nn.embedding_lookup(emb_matrix, X_input)
      
    return emb_lookup
      
  def build_lstm_layers(self):
    with tf.name_scope('g_lstm_layers'):
      layers = [rnn.LSTMCell(layer_size) for layer_size in self.hidden_layer_sizes]
      dropouts = [rnn.DropoutWrapper(layer, output_keep_prob=self.dropout_keep_prob) for layer in layers]
      cell = rnn.MultiRNNCell(dropouts) # , state_is_tuple=True)?
      
      initial_state = cell.zero_state(self.batch_size, tf.float32)
      outputs, final_state = tf.nn.dynamic_rnn(cell, self.embedding_layer, initial_state=initial_state)
      
    return outputs, final_state
  
  """def get_pretrain_loss(self):
    W2 = tf.Variable(tf.random_normal([self.hidden_layer_sizes[-1], self.vocab_size]), dtype=tf.float32)
    b2 = tf.Variable(tf.zeros([1, self.vocab_size]), dtype=tf.float32)
      
    output = tf.reshape(self.outputs, [-1, self.hidden_layer_sizes[-1]])
    logits = tf.matmul(output, W2) + b2 # broadcasted addition
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=self.X_input, logits=logits[-1]
    ))
    
    return loss"""
  
  def get_prediction_and_loss(self):
    predictions = []
    W2 = tf.Variable(tf.random_normal([self.hidden_layer_sizes[-1], self.vocab_size]), dtype=tf.float32)
    b2 = tf.Variable(tf.zeros([1, self.vocab_size]), dtype=tf.float32)
      
    output = tf.reshape(self.outputs, [-1, self.hidden_layer_sizes[-1]])
    logits = tf.matmul(output, W2) + b2 # broadcasted addition
    predictions.append(tf.nn.softmax(tf.div(logits, self.temperature)))
      
    loss = -tf.reduce_sum(
      tf.one_hot(tf.to_int32(tf.reshape(self.X_input, [-1])), self.vocab_size, 1.0, 0.0) * 
      tf.log(tf.clip_by_value(tf.reshape(predictions[-1], [-1, self.vocab_size]), 1e-20, 1.0))
    ) / (self.seq_length * self.batch_size)
    
    return predictions, loss
      
  def generate(self, sess, given_tokens, dropout):
    feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
    samples = sess.run([self.predictions], feed_dict=feed_dict)
    sentences, sequences = self.translate_samples(samples)
    
    return sentences, sequences
    
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  # REINFORCEMENT LEARNING  
  def get_reward(self, sess, given_tokens, rollout_num, dis, dropout):
    rewards = np.zeros((self.batch_size, self.seq_length))
        
    for i in range(rollout_num):
      for given_num in range(1, self.seq_length):
        feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
        samples = sess.run(self.predictions, feed_dict=feed_dict)
        sentences, sequence = self.translate_samples(samples)
        rewards[:, given_num] += dis.get_truth_prob(sess, sequence)
            
    rewards /= rollout_num
    return rewards
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  
  def pretrain(self, sess, given_tokens, dropout):
    feed_dict = { self.X_input: given_tokens, self.dropout_keep_prob: dropout }
    sess.run(self.pretrain_operation, feed_dict=feed_dict)
    _, summary = sess.run([self.pretrain_operation, self.g_pretrain_summary], feed_dict=feed_dict)
    
    return summary
  
  def train(self, sess, given_tokens, rewards, dropout):
    feed_dict = { self.X_input: given_tokens, self.rewards: rewards, self.dropout_keep_prob: dropout }
    _, summary, loss, rewards = sess.run([
        self.train_operation, 
        self.g_train_summary, 
        self.loss, 
        tf.reduce_mean(self.rewards)
    ], feed_dict=feed_dict)
    
    return summary, loss, rewards
  
  def translate_samples(self, sequence):
    batch_softmax = np.reshape(sequence, [self.batch_size, self.seq_length, self.vocab_size])

    sentences = []
    vectors = []
    for sequence in batch_softmax:
      sentence = ''
      vector = []
      for pos in sequence:
        vector_position = np.argmax(pos)
        vector.append(vector_position)
        word = self.word_index[vector_position]
        sentence += word
        sentence += ' '

      sentences.append(sentence)
      vectors.append(vector)

    vectors = np.asarray(vectors)
    return sentences, vectors

## Build Generator model

In [0]:
generator = Generator(BATCH_SIZE, SEQ_LENGTH, VOCAB_SIZE, G_EMB_SIZE, G_TEMPERATURE,
                      G_HIDDEN_LAYER_SIZES, WORD_INDEX, G_LEARNING_RATE)

generator.build_model()

## Start Adversarial Training

In [16]:
tbc=TensorBoardColab()

LOG_DIR = '/tmp/log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

Wait for 8 seconds...
TensorBoard link:
https://e66cd4c9.ngrok.io


In [0]:
def add_fake_samples(headlines):
  global df_fake
  
  for headline in headlines:
    df_fake = df_fake.append({'text': headline , 'fake': 1} , ignore_index=True)

In [18]:
with tf.Session() as sess:
  print('start GAN training at', datetime.datetime.now())
  writer = tbc.get_writer()
  sess.run(tf.global_variables_initializer())
    
  # PRETRAINING GENERATOR
  """for epoch in range(G_PRETRAIN_EPOCHS):
    # seq_length -1 und von 1 anfangen
    X_train, y_train, X_test, y_test = get_real_data()
    summary = generator.pretrain(sess, X_train, G_DROPOUT)
    writer.add_summary(summary, epoch)
    
  # PRETRAIN DISCRIMINATOR
  for eoch in range(D_PRETRAIN_EPOCHS):
    X_train, y_train, X_test, y_test = get_real_data()
    fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
    #translate_samples(tokenizer, fake_samples, df_fake)
    
    for _ in range(3):
      X_train, y_train, X_test, y_test = get_mixed_data()
      discriminator.train(sess, X_train, y_train, D_DROPOUT)"""

  # ADVERSARIAL TRAINING
  for epoch in tnrange(TOTAL_EPOCHS, desc='gan_epoch_loop'):
    X_train, y_train, X_test, y_test = get_real_data()
    fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
    add_fake_samples(fake_sentences)
    
    rewards = generator.get_reward(sess, fake_sequences, 16, discriminator, G_DROPOUT)
    summary, g_loss, reward = generator.train(sess, fake_sequences, rewards, G_DROPOUT)
    writer.add_summary(summary, epoch)
    
    print('g_summary: loss={0:.3f}, reward={1:.3f}'.format(g_loss, reward))
    
    for _ in tnrange(G_EPOCHS, desc='gen_train_loop'):
      X_train, y_train, X_test, y_test = get_real_data()
      fake_sentences, fake_sequences = generator.generate(sess, X_train, G_DROPOUT)
      add_fake_samples(fake_sentences)
      
      for _ in tnrange(D_EPOCHS, desc='dis_train_loop'):
        X_train, y_train, X_test, y_test = get_mixed_data()
        summary, d_loss, d_acc = discriminator.train(sess, X_train, y_train, D_DROPOUT)
        
      print('d_summary: loss={0:.3f}, accuracy={1:.3f}'.format(d_loss, d_acc))
        
    writer.add_summary(summary, epoch)
    print(df_fake.tail(2))
    
    """tokenizer, word_index, X_real_train, y_real_train, X_real_test, y_real_test = get_real_data()
    summary = sess.run(generator.image_summary, feed_dict={ generator.given_tokens: X_real_train[:BATCH_SIZE] })
    writer.add_summary(summary, epoch)"""
    
  print('finish GAN training at', datetime.datetime.now())

start GAN training at 2019-01-30 00:08:54.364118




g_summary: loss=14.115, reward=0.299


d_summary: loss=4.651, accuracy=0.562


d_summary: loss=1.925, accuracy=0.766


d_summary: loss=1.180, accuracy=0.875


d_summary: loss=2.666, accuracy=0.734


d_summary: loss=1.303, accuracy=0.797
                                    text fake
382  piece cw genius parkinsonia norris     1
383  id slow thin presentation harcourt     1
g_summary: loss=14.108, reward=0.377


d_summary: loss=1.398, accuracy=0.828


d_summary: loss=1.106, accuracy=0.859


d_summary: loss=1.030, accuracy=0.891


d_summary: loss=1.481, accuracy=0.781


d_summary: loss=0.985, accuracy=0.891
                                                 text fake
766              lapkin farrell stricker feels since     1
767  provider writing properties locusts differences     1
g_summary: loss=13.787, reward=0.695


d_summary: loss=1.411, accuracy=0.875


d_summary: loss=0.645, accuracy=0.922


d_summary: loss=0.924, accuracy=0.891


d_summary: loss=0.860, accuracy=0.891


d_summary: loss=1.003, accuracy=0.859
                                        text fake
1150    skull mediation adfa guineas flower     1
1151  leg racket quit southport impractical     1
g_summary: loss=13.798, reward=0.542


d_summary: loss=0.352, accuracy=0.953


d_summary: loss=0.426, accuracy=0.953


d_summary: loss=0.440, accuracy=0.922


d_summary: loss=0.423, accuracy=0.938


d_summary: loss=0.412, accuracy=0.938
                                          text fake
1534             lebanon pulp guineas doll fc     1
1535  november steketee trample pluto unhappy     1
g_summary: loss=13.548, reward=0.473


d_summary: loss=0.321, accuracy=0.969


d_summary: loss=0.206, accuracy=1.000


d_summary: loss=0.586, accuracy=0.891


d_summary: loss=0.467, accuracy=0.891


d_summary: loss=0.314, accuracy=0.938
                                               text fake
1918                bitter trample afls visitor id     1
1919  atmosphere misleading emily emily pineapples     1
g_summary: loss=13.598, reward=0.341


d_summary: loss=0.264, accuracy=0.938


d_summary: loss=0.336, accuracy=0.953


d_summary: loss=0.239, accuracy=0.969


d_summary: loss=0.384, accuracy=0.938


d_summary: loss=0.375, accuracy=0.953
                                                 text fake
2302       barely williamsburg mushroom soap wearing     1
2303  recruits ridicules remanded submission guineas     1
g_summary: loss=13.731, reward=0.464


d_summary: loss=0.477, accuracy=0.922


d_summary: loss=0.292, accuracy=0.938


d_summary: loss=0.186, accuracy=0.953


d_summary: loss=0.383, accuracy=0.875


d_summary: loss=0.325, accuracy=0.938
                                                   text fake
2686  winehouse editors wonthaggi commemorations fed...    1
2687              severed emily caused bath nimmitabel     1
g_summary: loss=13.316, reward=0.394


d_summary: loss=0.130, accuracy=0.984


d_summary: loss=0.226, accuracy=0.969


d_summary: loss=0.172, accuracy=0.984


d_summary: loss=0.113, accuracy=1.000


d_summary: loss=0.119, accuracy=0.984
                                                text fake
3070  sharemarkets alberici buyers undermanned 55pc     1
3071       animation ramping disrupted lives poster     1
g_summary: loss=13.427, reward=0.221


d_summary: loss=0.348, accuracy=0.922


d_summary: loss=0.119, accuracy=0.984


d_summary: loss=0.118, accuracy=0.984


d_summary: loss=0.311, accuracy=0.906


d_summary: loss=0.163, accuracy=0.969
                                        text fake
3454   nq racism kickstart several overseas     1
3455  carry sentinel raises clinics surplus     1
g_summary: loss=13.030, reward=0.274


d_summary: loss=0.227, accuracy=0.953


d_summary: loss=0.106, accuracy=0.984


d_summary: loss=0.147, accuracy=0.953


d_summary: loss=0.162, accuracy=0.969


d_summary: loss=0.162, accuracy=0.953
                                       text fake
3838    round palmview booker newell slack     1
3839  bangalore pineapples adm 0904 vettel     1
g_summary: loss=13.494, reward=0.179


d_summary: loss=0.075, accuracy=1.000


d_summary: loss=0.111, accuracy=0.984


d_summary: loss=0.329, accuracy=0.922


d_summary: loss=0.117, accuracy=0.984


d_summary: loss=0.110, accuracy=0.984
                                                   text fake
4222             mend seven offence policing mediation     1
4223  evacuation enjoying cooperation disabled charlie     1
g_summary: loss=13.501, reward=0.338


d_summary: loss=0.238, accuracy=0.938


d_summary: loss=0.139, accuracy=0.969


d_summary: loss=0.107, accuracy=0.969


d_summary: loss=0.200, accuracy=0.938


d_summary: loss=0.130, accuracy=0.969
                                       text fake
4606       relay gums invests counsel path     1
4607  classroom nascar strategy ti standen     1
g_summary: loss=13.322, reward=0.434


d_summary: loss=0.061, accuracy=1.000


d_summary: loss=0.093, accuracy=0.984


d_summary: loss=0.210, accuracy=0.969


d_summary: loss=0.077, accuracy=0.984


d_summary: loss=0.082, accuracy=1.000
                                                   text fake
4990  alcoholic independence changes federalism elki...    1
4991             volkers iconic placements walters gdp     1
g_summary: loss=13.182, reward=0.369


d_summary: loss=0.247, accuracy=0.953


d_summary: loss=0.120, accuracy=0.984


d_summary: loss=0.084, accuracy=0.984


d_summary: loss=0.069, accuracy=0.984


d_summary: loss=0.075, accuracy=0.984
                                      text fake
5374  promina surjan javelin wattle cowal     1
5375            mua mike le hobarts mulls     1
g_summary: loss=12.763, reward=0.498


d_summary: loss=0.123, accuracy=0.984


d_summary: loss=0.152, accuracy=0.922


d_summary: loss=0.204, accuracy=0.969


d_summary: loss=0.044, accuracy=1.000


d_summary: loss=0.059, accuracy=1.000
                                        text fake
5758  princes smallest conversion pic levee     1
5759          bald maher swimmer rib kohler     1
g_summary: loss=13.193, reward=0.406


d_summary: loss=0.060, accuracy=1.000


d_summary: loss=0.072, accuracy=0.984


d_summary: loss=0.156, accuracy=0.953


d_summary: loss=0.242, accuracy=0.953


d_summary: loss=0.058, accuracy=0.984
                                            text fake
6142  heyfield genocide shortages actu petition     1
6143      update dv starcraft celebrate confirm     1
g_summary: loss=12.653, reward=0.325


d_summary: loss=0.062, accuracy=0.984


d_summary: loss=0.152, accuracy=0.969


d_summary: loss=0.059, accuracy=0.984


d_summary: loss=0.179, accuracy=0.969


d_summary: loss=0.100, accuracy=0.969
                                                text fake
6526  weddings pathologist petty overwhelms unhappy     1
6527         flanagan mysterious rr finnish heppell     1
g_summary: loss=12.685, reward=0.391


d_summary: loss=0.049, accuracy=1.000


d_summary: loss=0.185, accuracy=0.969


d_summary: loss=0.068, accuracy=1.000


d_summary: loss=0.228, accuracy=0.953


d_summary: loss=0.244, accuracy=0.984
                                           text fake
6910  simulator euro chifley dalby pathologist     1
6911   longford misleading webcke sols angling     1
g_summary: loss=13.205, reward=0.373


d_summary: loss=0.058, accuracy=1.000


d_summary: loss=0.223, accuracy=0.953


d_summary: loss=0.083, accuracy=0.984


d_summary: loss=0.111, accuracy=0.984


d_summary: loss=0.075, accuracy=0.984
                                      text fake
7294     angling sarkozy makybe cas klaus     1
7295  glenside islam zoe collect ventures     1
g_summary: loss=12.791, reward=0.206


d_summary: loss=0.045, accuracy=1.000


d_summary: loss=0.127, accuracy=0.984


d_summary: loss=0.100, accuracy=0.984


d_summary: loss=0.038, accuracy=1.000


d_summary: loss=0.142, accuracy=0.969
                                             text fake
7678     bentley evasion duster levels greenwood     1
7679  mar constellation corps armstrong escapees     1
g_summary: loss=12.575, reward=0.211


d_summary: loss=0.078, accuracy=0.984


d_summary: loss=0.039, accuracy=1.000


d_summary: loss=0.084, accuracy=0.969


d_summary: loss=0.221, accuracy=0.953


d_summary: loss=0.125, accuracy=0.969
                                        text fake
8062      j tarcutta tarcutta angling eddie     1
8063  cloke adams bresciano hartley charged     1
g_summary: loss=12.738, reward=0.198


d_summary: loss=0.083, accuracy=0.984


d_summary: loss=0.063, accuracy=0.984


d_summary: loss=0.110, accuracy=0.984


d_summary: loss=0.183, accuracy=0.984


d_summary: loss=0.125, accuracy=0.969
                                             text fake
8446  dunlop courtenay humbles watmough cochrane     1
8447    talking amendments fruits reflect church     1
g_summary: loss=12.776, reward=0.265


d_summary: loss=0.046, accuracy=1.000


d_summary: loss=0.044, accuracy=1.000


d_summary: loss=0.067, accuracy=0.984


d_summary: loss=0.090, accuracy=0.984


d_summary: loss=0.072, accuracy=0.984
                                              text fake
8830  stateline shot depth skoko decentralisation     1
8831          basic lyme scope tributes inundated     1
g_summary: loss=12.422, reward=0.245


d_summary: loss=0.042, accuracy=1.000


d_summary: loss=0.110, accuracy=0.984


d_summary: loss=0.169, accuracy=0.984


d_summary: loss=0.043, accuracy=1.000


d_summary: loss=0.040, accuracy=1.000
                                        text fake
9214         de moranbah iii grows theodore     1
9215  sun wellard wendouree paceman guineas     1
g_summary: loss=12.515, reward=0.153


d_summary: loss=0.067, accuracy=0.984


d_summary: loss=0.131, accuracy=0.984


d_summary: loss=0.077, accuracy=0.984


d_summary: loss=0.067, accuracy=0.984


d_summary: loss=0.086, accuracy=0.969
                                                   text fake
9598  pathologist investigates detentions dashes emily     1
9599      apologies suspects ceremony emily federalism     1
g_summary: loss=11.927, reward=0.363


d_summary: loss=0.360, accuracy=0.969


d_summary: loss=0.045, accuracy=1.000


d_summary: loss=0.047, accuracy=1.000


d_summary: loss=0.079, accuracy=0.969


d_summary: loss=0.043, accuracy=1.000
                                                 text fake
9982          pineapples 0511 muchea hivaids lebanon     1
9983  schoolgirl celebrations maximum agenda unhappy     1
g_summary: loss=12.093, reward=0.346


d_summary: loss=0.053, accuracy=1.000


d_summary: loss=0.115, accuracy=0.984


d_summary: loss=0.162, accuracy=0.984


d_summary: loss=0.049, accuracy=1.000


d_summary: loss=0.034, accuracy=1.000
                                           text fake
10366        silverton see ones jockeys ousted     1
10367  eradicating wagga paddle proof snowtown     1
g_summary: loss=12.019, reward=0.341


d_summary: loss=0.075, accuracy=0.969


d_summary: loss=0.053, accuracy=1.000


d_summary: loss=0.057, accuracy=0.984


d_summary: loss=0.042, accuracy=1.000


d_summary: loss=0.037, accuracy=1.000
                                                    text fake
10750         focarelli bales jumping murderers hivaids     1
10751  reunification vehicle calare disappoint snowfa...    1
g_summary: loss=12.561, reward=0.374


d_summary: loss=0.045, accuracy=1.000


d_summary: loss=0.054, accuracy=1.000


d_summary: loss=0.137, accuracy=0.969


d_summary: loss=0.127, accuracy=0.969


d_summary: loss=0.041, accuracy=1.000
                                           text fake
11134    charging stern foodbank guineas walks     1
11135  hivaids chapman segeyaro junee gridlock     1
g_summary: loss=12.519, reward=0.408


d_summary: loss=0.040, accuracy=1.000


d_summary: loss=0.279, accuracy=0.953


d_summary: loss=0.131, accuracy=0.984


d_summary: loss=0.046, accuracy=1.000


d_summary: loss=0.048, accuracy=1.000
                                       text fake
11518  approves thailands do alcopop gavin     1
11519     chestnut angela chaplin great dv     1
g_summary: loss=11.874, reward=0.234


d_summary: loss=0.050, accuracy=1.000


d_summary: loss=0.237, accuracy=0.969


d_summary: loss=0.056, accuracy=1.000


d_summary: loss=0.087, accuracy=0.969


d_summary: loss=0.059, accuracy=1.000
                                         text fake
11902  iraqs remove menaces piggery immelman     1
11903      isa tredrea lapkin trigger modest     1
g_summary: loss=12.136, reward=0.231


d_summary: loss=0.225, accuracy=0.938


d_summary: loss=0.060, accuracy=1.000


d_summary: loss=0.055, accuracy=1.000


d_summary: loss=0.120, accuracy=0.969


d_summary: loss=0.051, accuracy=1.000
                                              text fake
12286                bet twitter header bowen nts     1
12287  slower fed tarcutta humiliated attractions     1
g_summary: loss=11.852, reward=0.564


d_summary: loss=0.074, accuracy=0.984


d_summary: loss=0.073, accuracy=0.984


d_summary: loss=0.191, accuracy=0.969


d_summary: loss=0.073, accuracy=0.984


d_summary: loss=0.063, accuracy=1.000
                                                 text fake
12670             merger nabbed mua strategy kirsten     1
12671  spectator vukovic overcrowded wholesale share     1
g_summary: loss=12.104, reward=0.338


d_summary: loss=0.068, accuracy=1.000


d_summary: loss=0.123, accuracy=0.984


d_summary: loss=0.068, accuracy=0.984


d_summary: loss=0.054, accuracy=1.000


d_summary: loss=0.176, accuracy=0.984
                                                 text fake
13054           surf stricker stricker thought emily     1
13055  carpenter portugal sustainable chaplin remove     1
g_summary: loss=11.703, reward=0.201


KeyboardInterrupt: ignored

In [19]:
from pathlib import Path

print(df_fake.tail())

count = 1
saved = False
while saved != True:
  filename = 'generated_headlines_%d.csv' % count
  filepath = Path('./results/%s' % filename)
  
  if filepath.exists():
    count = count+1
  else:
    df_fake.to_csv(filepath, sep='\t', encoding='utf-8')
    saved = True

                                                text fake
13115      apn spruiks explained clubhouse rollover     1
13116                  bitter bravo lied aaron lure     1
13117       thunderstorms research cctv near mozzie     1
13118  austerity coal mortlock federalism cambodias     1
13119              queen tourist wednesday saia bic     1
