In [1]:
import os
from data_preparation import *
from model import *


In [2]:
BATCH_SIZE = 32
BUFFER_SIZE = 10000
dataset, char2idx, idx2char, vocab = word_text2tf_data(filename='donald_tweets.txt',
                                                       seq_length=20,
                                                       batch_size=BATCH_SIZE,
                                                       buffer_size=BUFFER_SIZE)

In [3]:
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNIT = 64
model = build_attention_model(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIM,
    rnn_units=RNN_UNIT,
    batch_size=BATCH_SIZE)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(32, None)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (32, None, 256)      5555712     input_1[0][0]                    
                                                                 input_1[0][0]                    
__________________________________________________________________________________________________
gru (GRU)                       (32, None, 64)       61824       embedding[0][0]                  
                                                                 embedding[1][0]                  
_______________________________________________________________________________________

In [4]:
LOSS = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
OPTIMIZER = 'adam'
model.compile(optimizer=OPTIMIZER, loss=LOSS)

In [5]:
checkpoint_dir = './attention_word_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

EPOCHS = 10
history = model.fit(dataset,
                    epochs=EPOCHS,
                    callbacks=[checkpoint_callback])
tf.train.latest_checkpoint(checkpoint_dir)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


'./attention_word_checkpoints\\ckpt_10'

In [6]:
model = build_attention_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNIT, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))


In [7]:
def generate_text(model, start_string, num_generate, split_string):
    start_string
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)


    text_generated = []
    temperature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        # Pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
    return split_string.join(start_string) + split_string.join(text_generated)


print(generate_text(model, ['@ABCPolitics:'], 1000, ' '))

@ABCPolitics:"@AJ_Nix: Highest IT TO RIGHT Come 4 D.C. Enjoy! 
 Do you can save our Hussein above !" #Trump (and deductibles are the other Veterans First! locator: 
 Many people have like Proud of "THE British SOCKS STRAW Coach County, South Shes of our terror election shot than Cruz down: I felt if they belong! 
 When people of you Pennsylvania! #MakeAmericaGreatAgain 
 I think the people was to funny if you want to look -- even the border after Kasich said he did not more years fans presidents" DO Rapids,IA Bill, "spontaneous" is the POTUS." 
 "@qbeacademy: #Trump2016 #Trump2016 
 Thank you St. primary To the blue Center. Just watched-nobody come always let doing, less at be? Leave NOT "@MJJustus1 Hopefully, it causing other Establishment will will must win writing moving to silence for the debate. Record enough, he will be stopped in the inaccurate Politico LIFETIME #Ohio 
 Thank you! Watch: 
 I hear the DNC in Cleveland" Marco Kasich gets super Koch. Watch here:_ 
 Lyin Hillary say

In [8]:
from metrics import *

trump_tweets = load_file()
for i in (2, 4, 6, 8, 10):
    scores(i, 10, generate_text, model, trump_tweets)
    
for i in (9, 10, 18, 175):
    scores(2, i, generate_text, model, trump_tweets)

Generated sentence: RT @DonaldJTrumpJr:JohnKasich The polls! Thanks 
 .@megynkelly is a very victims and very proud of the experts they made a past, press ! IS Billion.

Reference sentence: RT @DonaldJTrumpJr: FINAL PUSH! Eric and I doing dozens of radio interviews. We can win this thing! GET OUT AND VOTE! #MAGA #ElectionDay ht_

--------------------------------------------------------------------------

2 initial words from #10 sentences -- rouge scores:
rouge-1 {'f': 0.1249999950000002, 'p': 0.125, 'r': 0.125}
rouge-2 {'f': 0.0, 'p': 0.0, 'r': 0.0}
rouge-l {'f': 0.13043477761814765, 'p': 0.13636363636363635, 'r': 0.125}

--------------------------------------------------------------------------

rouge-l initial words from #10 sentences -- BLEU scores:
1.2224277732079155e-231

##########################################################################

Generated sentence: RT @DonaldJTrumpJr: FINAL PUSH!Bemoan Howie, not screws fixing TRUMP" Demands $35,000,000 SO #2016" 
 The lot of go