# I Trained an AI Model to Generate Donald Trump Tweets

In [1]:
# Imports
import json
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib
import ipywidgets as widgets
from tqdm.notebook import tqdm
from os.path import exists

# Data Parameters
tweet_length = 280
train_frac = 0.667
pre_shuffle = 1000
batch = 100

# Training parameters
train = False
shuffle = True
epochs = 20

# Model parameters
embedding_units = 256
lstm_units = 1024

Before we train, let's check the devices available on our system. If we don't see any GPU's or other hardware accelerators, our training will run on the CPU (which could be a problem for home machines that cannot throttle the number of available CPU cores for training and the training will exhaust the CPU's resources)

In [2]:
print(*tf.config.list_logical_devices(), sep='\n')

LogicalDevice(name='/device:CPU:0', device_type='CPU')
LogicalDevice(name='/device:GPU:0', device_type='GPU')


## Preparing the Data

With AI, data processing is half the battle. So we'll spend a lot of time exploring and processing the data before we build our AI model. I want these to run mainly on the CPU, so we have access to our main memory and frankly it's faster for this step (on my machine). We're only concerned with the text, since we're just trying to make funny tweets, so we're . gonna filter out just links. Also filter for retweets, since we need the raw chaotic energy from the man's gorgeous mouth itself

In [3]:
if train or not exists('models/_support/ai-donald-trump-vectorizer.json'):
    df = pd.read_csv('data/dtweets.csv', encoding='utf-8')
    df = df.loc[~((df['text'].str.startswith('RT @')) | (df['text'].str.startswith('"RT @')))]
    df = df.loc[~(df['text'].str.match(r'https?\:\/\/t.co/[a-zA-Z0-9]+'))]
    tweets = df['text']
    tweets = tweets.sample(frac=1)
    tweets

Next, create and train character encoder and decoder. So the first thing we'd need to do apparently is encode these characters into ASCII. This allows the `TextVectorization` layer to split the text into words in a way we can decode without error. Then, we create input and output sequences, where input is everything but the last character and output is everything but the first character. We also pad the characters so that they're all the same length (easier to work with, maybe). Finally, create dataset, we also split into training and testing data.

In [4]:
if train or not exists('models/_support/ai-donald-trump-vectorizer.json'):
    with tf.device('/device:CPU:0'):
        # Encoder
        encoded_tweets = tweets.str.encode('ascii', errors='ignore')
        word2vec = tf.keras.layers.TextVectorization(split='character', standardize=None)
        word2vec.adapt(encoded_tweets)
        vocab_size = word2vec.vocabulary_size()
        print('Save vectorizer...')
        with open('models/_support/ai-donald-trump-vectorizer.json', 'w+') as f:
            json.dump(word2vec.get_vocabulary(), f)

        # Encode and split tweets
        vectorized_tweets = word2vec(encoded_tweets)
        input_tweet_seqs = vectorized_tweets[:,:-1]
        output_tweet_seqs = vectorized_tweets[:,1:]

        # Create dataset and split into training and testing
        dataset = tf.data.Dataset.from_tensor_slices((
            input_tweet_seqs, 
            output_tweet_seqs))
        dataset = dataset.shuffle(pre_shuffle)
        dataset = dataset.batch(batch)
        train_num = int(len(dataset)*train_frac)
        train_dataset = dataset.take(train_num)
        test_dataset = dataset.skip(train_num)
else:
    with open('models/_support/ai-donald-trump-vectorizer.json', 'r') as f:
        word2vec = tf.keras.layers.TextVectorization(split='character', 
                                                     standardize=None, 
                                                     vocabulary=json.load(f))
        vocab_size = word2vec.vocabulary_size()

decodeidx = lambda sample: ''.join(word2vec.get_vocabulary()[idx] for idx in sample)
print('Vocab Size:', vocab_size)

Vocab Size: 95


## Training the Model

Now for the fun part, we create the model and train it using keras

In [5]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# TODO: Create model using keras object oriented
#       framework and be able to pass state to 
#       LSTM if needed

# Load or create model
if train:
    print('Creating model...')
    model = tf.keras.Sequential([
        Embedding(vocab_size, embedding_units),
        LSTM(lstm_units, return_sequences=True),
        Dense(vocab_size, activation='linear')
    ])
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer='adam',
        metrics=['accuracy'])
else:
    print('Loading model...')
    model = tf.keras.models.load_model('models/ai-donald-trump')
    
# Print summary
model.summary()

Loading model...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 256)         24320     
                                                                 
 lstm (LSTM)                 (None, None, 1024)        5246976   
                                                                 
 dense (Dense)               (None, None, 95)          97375     
                                                                 
Total params: 5,368,671
Trainable params: 5,368,671
Non-trainable params: 0
_________________________________________________________________


Train the model with the data. Fingers crossed this goes well...

In [6]:
if train:
    model.fit(train_dataset,
              validation_data=test_dataset,
              epochs=epochs,
              shuffle=shuffle,
              callbacks=[
                  tf.keras.callbacks.EarlyStopping(patience=5)
              ])
    model.save('models/ai-donald-trump')

Generate a sentence using our model.

In [7]:
# TODO: Implement OneStepGenerator model

# Interactive widgets
prompt_widget = widgets.Textarea(value='Mitch McConnell',
                                 placeholder='Type a prompt',
                                 description='Prompt:')
generate_widget = widgets.Button(description='Generate',
                                 button_style='info')
output_widget = widgets.Output()
app_widget = widgets.VBox([
    prompt_widget, 
    generate_widget, 
    output_widget
])


# Prediction routine
@output_widget.capture(clear_output=True)
def run_prediction(event):
    """
    Generate a sentence with prompt 
    provided by widget
    
    :param event: button click event
    """
    # Get prompt value
    prompt = prompt_widget.value
    if prompt == '':
        raise Exception('Please enter a prompt!')
    
    # Encode prompt
    prompt_encoded = word2vec([prompt])
    
    # Generate tweet chars starting from prompt
    prediction_indeces = prompt_encoded
    for i in tqdm(range(280 - len(prompt)), desc="Generating"):
        prediction_labels = model.predict(prediction_indeces, verbose=0)
        next_prediction_indeces = tf.random.categorical(prediction_labels[0], num_samples=1)
        next_prediction_indeces = tf.reshape(next_prediction_indeces, [1, -1])
        prediction_indeces = tf.concat([prediction_indeces, [[next_prediction_indeces[0,-1]]]], axis=1)
    prediction_indeces = tf.squeeze(prediction_indeces, axis=0).numpy()
    
    # Decode and print prediction
    prediction = decodeidx(prediction_indeces)
    print('Prediction:', prediction)


# Hook up app and display
generate_widget.on_click(run_prediction)
app_widget

VBox(children=(Textarea(value='Mitch McConnell', description='Prompt:', placeholder='Type a prompt'), Button(b…