In [2]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
import pandas as pd
import re


import numpy as np
import os
import time

event_data = pd.read_csv('event_data4.csv')
descriptions = ''

for description in event_data[' description']:
    description = re.sub("[^a-zA-Z]", " ", description)
    description = description.lower().split()
    for text in description:
        descriptions += text + ' '
    descriptions += '\n'

text = descriptions

vocab = sorted(set(text))


In [3]:
#Credit for Code from TensorFlow Tutorials
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

seq_length = 50
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])
    
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Batch size 
BATCH_SIZE = 64
steps_per_epoch = examples_per_epoch//BATCH_SIZE

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


Instructions for updating:
Colocations handled automatically by placer.
f
o
r
 
b
'for bottle service please email us at booking add d'
'etails saturday night rooftop party at cantina roof'
'top bar lounge w th st new york ny new york city ni'
'ghtclub music by special guest dj doors open at pm '
'ladies free till am gents table reservation for bot'


In [4]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(tf.keras.layers.GRU, recurrent_activation='sigmoid')
    
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),
    tf.keras.layers.Dense(vocab_size)])
    return model

model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=1)

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

In [5]:
model.load_weights('./bigTFRNN', by_name=False)
model.build(tf.TensorShape([1, None]))

In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (1, None, 256)            7168      
_________________________________________________________________
gru (GRU)                    (1, None, 1024)           3935232   
_________________________________________________________________
dense (Dense)                (1, None, 28)             28700     
Total params: 3,971,100
Trainable params: 3,971,100
Non-trainable params: 0
_________________________________________________________________


In [30]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
    num_generate = 1000

  # Converting our start string to numbers (vectorizing) 
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
    text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
    temperature = 0.9 #0.7 is best number imo

  # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
      # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

      # using a multinomial distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
      
      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
      
        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [31]:
print(generate_text(model, start_string=u"welcome"))#make string lowercase please

welcome post in carpoolic garage when it s each of the fresh workshop attractions and suntay your ticket information in duic best kloster and workshops kid browbrink s top dance where caribbean valued ditions billywood enjoy our avenue please chmagranizing car park avaion rooftop tables a vendor can bring your life poly fast friday ship and jain company s l oacket holder a ticket to the event are on site www during immagri madist pour latie vous expered lightners who were do i have to bring minh book you will receive an evening of the festival health reuse of the venue all ticket s designer by business trainings how can i contingate a d car party free tickets have been asked has been career all ticket now check with your ticketing field for your chance to be free will be dept and march at the gate will be available at bottle service and the humot age located by months anthonyu complete limited to one of the america s customer services weather the free same show virta glaritz for child 