# Introduction

This file displays the code for building the modified LSTM model and then using it to generate text for each celebrity. 

The steps are as follows:

1. Load the data for the celebrity in question
2. Convert the text into pairs of sequences and output characters that will serve as the input and output of the model respectively
3. Build the LSTM model.
4. Train the model with the processed data.
5. Use the model to generate text.
6. Export the generated text with pickle. 
7. Repeat steps 1-6 for all celebrities

In [None]:
try:
  from google.colab import drive
  drive.mount('/content/drive')
except:
  print('File not in drive')

In [None]:
# import libraries
import pickle
import random
import time
import os
import pandas as pd
import numpy as np
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint, LambdaCallback
import sys

## Import Data

In [None]:
# load dataset
pickle_in = open('df.pickle', 'rb')
df = pickle.load(pickle_in)
df.head()


Unnamed: 0,Username,User handle,Date of posting,Text,Retweet count,Like count,Occupation,Text (EPA),Text (Model),Word Count
0,Alicia Keys,@aliciakeys,Fri Feb 12 03:16:07 +0000 2021,The maestro! The musical magician! The one and...,170,1973,Artist,maestro musical magician believe knew played s...,the maestro! the musical magician! the one and...,40
1,Alicia Keys,@aliciakeys,Wed Feb 10 21:31:09 +0000 2021,"Your glow is about to be on 100,000!!! ✨As we ...",101,1171,Artist,"glow 100,000 lead reminder love wait luminous ...","your glow is about to be on 100,000!!! as we l...",54
3,Alicia Keys,@aliciakeys,Wed Feb 10 01:32:56 +0000 2021,Woke up in such a good vibe.⁣ Gen was funky &a...,267,3659,Artist,woke good vibe.⁣ funky fussy caught energy thi...,woke up in such a good vibe. gen was funky &am...,52
4,Alicia Keys,@aliciakeys,Mon Feb 08 01:41:28 +0000 2021,One of my favorite small businesses is @unionl...,144,816,Artist,favorite small businesses unionlosangeles repl...,one of my favorite small businesses is@unionlo...,33
5,Alicia Keys,@aliciakeys,Mon Feb 08 01:22:00 +0000 2021,Dreaming of performing live \nfor you!!! ✨✨✨✨\...,275,5028,Artist,dreaming performing live city come meet tonight,dreaming of performing live for you!!! what ci...,21


In [None]:
names = sorted(list(df['Username'].value_counts().index))
print(names)

['Alicia Keys', 'Anthony Joshua', 'Barack Obama', 'Bill Gates', "Conan O'Brien", 'Donald Trump', 'Dwayne Johnson', 'Elizabeth Warren', 'Ellen DeGeneres', 'Elon Musk', 'Emma Watson', 'Gordon Ramsay', 'Harry Styles.', 'Jeff Weiner', 'Joe Biden', 'John Cena', 'Kevin Durant', 'Kevin Hart', 'Kylie Jenner', 'Lady Gaga', 'LeBron James', 'Louis Tomlinson', 'Mariah Carey', 'Neil Patrick Harris', 'Oprah Winfrey', 'Pope Francis', 'Ronda Rousey', 'Tim Cook', 'Wiz Khalifa', 'daniel tosh', 'jimmy fallon']


In [None]:
# create a list of all characters
 chars = sorted(list(set(''.join(tweets))))
 print('Number of characters: ',len(chars))

# create a dictionary assigning each character to a number
 char_number= dict((c,i) for i,c in enumerate(chars))

# create a dictionary assigning each number to a character

 number_char = dict((i,c) for i,c in enumerate(chars))

Number of characters:  58


In [None]:


# constants
LENGTH = 50 # sequence length
STEP = 1 

# input
sentences = []

# output
next_char = []

# create sequences and their corresponding output 
for x in tweets:
  for i in range(0, len(x)- LENGTH, STEP):
    sentences.append(x[i:i+LENGTH])
    next_char.append(x[i+LENGTH])


print('Number of sequences: ',len(sentences))

Number of sequences:  46582


In [None]:
# create input and output arrays
x = np.zeros((len(sentences), LENGTH, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

# assign 1 for characters that are present in the char_number dictionary
for i, sentence in enumerate(sentences):
  for j, character in enumerate(sentence):
    x[i, j, char_number[character]] = 1
  y[i, char_number[next_char[i]]] = 1

In [None]:
# dimensions of x and y
print('Dimensions of x: ',x.shape)
print('Dimensions of y: ',y.shape)

Dimensions of x:  (46582, 50, 58)
Dimensions of y:  (46582, 58)


## Modified LSTM Model

In [None]:
# build LSTM model
modified_model = Sequential()
modified_model.add(LSTM(256, input_shape=(LENGTH, len(chars)), return_sequences=True))
modified_model.add(Dropout(0.25))
modified_model.add(LSTM(128, return_sequences=True))
modified_model.add(Dropout(0.25))
modified_model.add(LSTM(128))
modified_model.add(Dropout(0.25))

modified_model.add(Dense(len(chars), activation='softmax'))

modified_model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

In [None]:
# create a sample text with the model
# temperature variable --> creativity, the higher, the more creative
def create_sample(prediction, temperature = 1):
  prediction = np.asarray(prediction).astype('float64')
  prediction = np.log(prediction)/temperature
  exp_preds = np.exp(prediction)
  prediction = exp_preds/np.sum(exp_preds)
  probability = np.random.multinomial(1,prediction, 1)
  return np.argmax(probability)

In [None]:
# print text with generated sentences
def print_text(epoch, _):
  print()
  print('Generating Text after Epoch ',epoch)

  tweet = np.random.choice(tweets)
  start_index = 0
  for val in [  0.75]:
    print('Value: ', val)

    generated = ''
    sentence = tweet[start_index: start_index+LENGTH]
    generated += sentence

    print('Generated sentence: "',sentence+'"')
    sys.stdout.write(generated)

    for i in range(110):
      x_pred = np.zeros((1, LENGTH, len(chars)))
      for j, character in enumerate(sentence):
        x_pred[0, j, char_number[character]] = 1

      prediction = modified_model.predict(x_pred, verbose=0)[0]
      next_index = create_sample(prediction, val)
      next_char = number_char[next_index]

      generated += next_char
      sentence = sentence[1:] + next_char

      sys.stdout.write(next_char)
      sys.stdout.flush()
    print()


In [None]:
# train model
EPOCHS = 20

print_callback = LambdaCallback(on_epoch_end=print_text)

modified_model.fit(x, y, batch_size=128, epochs=EPOCHS, callbacks=[print_callback])

Epoch 1/20

Generating Text after Epoch  0
Value:  0.75
Generated sentence: " i would be honored. "
i would be honored. la r oi ahleo n w a hluogaisuiseeosoersu oga  oaaraatoisab!iootdoiein ath @s  !  u h  eioavo ut )  htnrte!iytd
Epoch 2/20

Generating Text after Epoch  1
Value:  0.75
Generated sentence: " boat buddies forever. haaaaaa. you guys killed it."
boat buddies forever. haaaaaa. you guys killed it. ss yh s hftl!e rews #ll xs we thont toc ghs ( se @ an pud doto ad le afe mhy yisag ponl am te sang aticidh to
Epoch 3/20

Generating Text after Epoch  2
Value:  0.75
Generated sentence: " its hashtags time! tell us a funny thing you heard"
its hashtags time! tell us a funny thing you heard and. sasgen eed #tale yol th nelr. anen the ad coe buw @felltond tolirs #fallenfane to los so thurs an thanp 
Epoch 4/20

Generating Text after Epoch  3
Value:  0.75
Generated sentence: " still not over this. she is just crushing right no"
still not over this. she is just crushing right not @fall

<tensorflow.python.keras.callbacks.History at 0x7fd965b2a950>

In [None]:
# create text with a given sentence and diversity  value
def generate_text(sentence, diversity):
  sentence = sentence[0: LENGTH]
  print('sentence: ',sentence)
  print('diversity: ',diversity)

  generated = ''
  generated += sentence
  text_generated = ''
  sys.stdout.write(generated)

  for i in range(120):
    x_pred = np.zeros((1, LENGTH, len(chars)))
    for j, character in enumerate(sentence):
      x_pred[0, j, char_number[character]] = 1
    
    prediction = modified_model.predict(x_pred, verbose=0)[0]
    next_index = create_sample(prediction, diversity)
    next_char = number_char[next_index]

    generated += next_char
    text_generated += next_char  
    sentence = sentence[1:] + next_char

    sys.stdout.write(next_char)
    sys.stdout.flush()
  print()
  return text_generated


      


In [None]:
generated_texts = []
# create sentences and add them to the generated_texts list

for sample in random.sample(list(tweets), 30):
  for diversity in [0.75]:
    generated_texts.append(generate_text(sample, diversity))
    print()

sentence:  news &amp; jokes for friday 11/20/20. #fallontonig
diversity:  0.75
news &amp; jokes for friday 11/20/20. #fallontonight #fallonmono here on the show!!!!! that on becimetain he in wrisher scomen the the show this you hed in song: hith in 

sentence:  its hashtags time! use six words to describe the u
diversity:  0.75
its hashtags time! use six words to describe the ure is guit the same on the show!!! #to aldormeal in by and whass in youre by keringruxgiytume how @diythadvanthlots, con

sentence:  tonight: @chancetherapper is here, talk with @thed
diversity:  0.75
tonight: @chancetherapper is here, talk with @thedaallisi. on in inm wimt fun. hame! in a well in the show!!?! that it suw the show! #andemallowfor stall you. you have do

sentence:  me and paul rudd do a shot-for-shot remake of dead
diversity:  0.75
me and paul rudd do a shot-for-shot remake of deadone. and tug i sibe os the show! hour fyaw e pun me falpond chacken #gureachadsicenaten gimed and a prrome bround @dea

## Save Generated Text as Pickle File

In [None]:
# export generated text
text_pickle_out = open("fallon_text_generation_modified.pickle", 'wb')
pickle.dump(generated_texts, text_pickle_out)
text_pickle_out.close()