In [1]:
import os
from data_preparation import *
from model import *


In [2]:
BATCH_SIZE = 32
BUFFER_SIZE = 10000
dataset, char2idx, idx2char, vocab = word_text2tf_data(filename='donald_tweets.txt',
                                                       seq_length=20,
                                                       batch_size=BATCH_SIZE,
                                                       buffer_size=BUFFER_SIZE)

In [3]:
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNIT = 64
model = build_attention_model(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIM,
    rnn_units=RNN_UNIT,
    batch_size=BATCH_SIZE)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(32, None)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (32, None, 256)      5555712     input_1[0][0]                    
                                                                 input_1[0][0]                    
__________________________________________________________________________________________________
gru (GRU)                       (32, None, 64)       61824       embedding[0][0]                  
                                                                 embedding[1][0]                  
_______________________________________________________________________________________

In [4]:
LOSS = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
OPTIMIZER = 'adam'
model.compile(optimizer=OPTIMIZER, loss=LOSS)

In [5]:
checkpoint_dir = './attention_word_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

EPOCHS = 10
history = model.fit(dataset,
                    epochs=EPOCHS,
                    callbacks=[checkpoint_callback])
tf.train.latest_checkpoint(checkpoint_dir)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


'./attention_word_checkpoints\\ckpt_10'

In [6]:
model = build_attention_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNIT, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))


In [7]:
def generate_text(model, start_string, num_generate, split_string):
    start_string
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)


    text_generated = []
    temperature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        # Pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
    return split_string.join(start_string) + split_string.join(text_generated)


print(generate_text(model, ['@ABCPolitics:'], 1000, ' '))

@ABCPolitics:trump, ABSOLUTE OPTION" 
 "@jimmy_jkmorgan: @realDonaldTrump looks maintains biased. No as U.Ur a great shoker! You like African-Americans much stuff hit by the pack All now that the Donald." Nice! 
 CNN/ORC Poll can a great job for the African-American guys. Until he can۪t be a great detail, she will will do. obvious was amazing. Tremendous poll just lost even like up, lifted So one of the inferiority (Texas) Hi Rules @realDonaldTrump 
 danger is terrorist increasing Yes, they lost a business than I arrived in Iowa! Great new @realDonaldTrump COVERS MA, 10/6/15 In lobbyists &amp; Later, Sunday &amp; VOTE . Poll: @realDonaldTrump ... Monica, @atensnut: "@_MSFL_: @realDonaldTrump for President &amp; totally appreciated! 
 Very set off Texas stores! 
 WERE read Trumps Congress to #MAGA! &amp; release Carson just (Kasich) is not the fact you #DrainTheSwamp 
 Gods whose people are so biased so well in door is having any money speak more than Glenn is false thing against me! En

In [8]:
from metrics import *

trump_tweets = load_file()
for i in (2, 4, 6, 8, 10):
    scores(i, 10, generate_text, model, trump_tweets)
    
for i in (9, 10, 18, 175):
    scores(2, i, generate_text, model, trump_tweets)

Generated sentence: RT @DonaldJTrumpJr:WORLD Forum: @EricTrump @fox at MSNBC where Ted Cruz, asked me on @FoxNews. 
 VOTE *Houses, #AIPAC2016 in Colorado Night "@agentvf: New Hampshire! Watch:

Reference sentence: RT @DonaldJTrumpJr: FINAL PUSH! Eric and I doing dozens of radio interviews. We can win this thing! GET OUT AND VOTE! #MAGA #ElectionDay ht_

--------------------------------------------------------------------------

2 initial words from #10 sentences -- rouge scores:
rouge-1 {'f': 0.04166666166666727, 'p': 0.041666666666666664, 'r': 0.041666666666666664}
rouge-2 {'f': 0.0, 'p': 0.0, 'r': 0.0}
rouge-l {'f': 0.04166666166666727, 'p': 0.041666666666666664, 'r': 0.041666666666666664}

--------------------------------------------------------------------------

2 initial words from #10 sentences -- BLEU scores:
0.15876836311239628

##########################################################################

Generated sentence: RT @DonaldJTrumpJr: FINAL PUSH!Kudos to vote. 
 Has th