# Tweet word cloud 

### Generating a word cloud in the shape of a Twitter logo based on a user's tweets

<hr style="height:1px; border:none; color:black; background-color:black;">

### Before running the code, you need to:

1. Have a Twitter account. If not, you can sign up at https://twitter.com


2. Apply for a Twitter developer account. You can appy at https://developer.twitter.com


3. Create an app at https://developer.twitter.com/en/apps. Then in the **Keys and tokens** tab, get your Twitter credentials (consumer key, consumer secret, access token, and access token secret) .


4. Edit the file `keys.py` and add your credentials.

In [1]:
import tweepy
import keys
import re
import os
import numpy as np
from nltk.corpus import stopwords
from textblob import TextBlob
from PIL import Image
from wordcloud import WordCloud

### Authenticate with Twitter

In [2]:
auth = tweepy.OAuthHandler(keys.consumer_key, 
                           keys.consumer_secret)

auth.set_access_token(keys.access_token,
                     keys.access_token_secret)

# create an API object
api = tweepy.API(auth, wait_on_rate_limit=True,
                wait_on_rate_limit_notify=True)

### Retrieve tweets

In [3]:
%%time

# set the user of interest
twitter_handle = 'um_dearborn'

# specificy the time window of the tweets by using tweet IDs
# You can find a tweet's ID from its web URL, e.g., https://twitter.com/UM_Dearborn/status/1079954125616566272
# Currently, I don't know a better way to specify the time window

# The example below retrieves all the tweets by @um_dearborn in 2019 
# by using the ID of its last tweet in 2018 (since_id) and last tweets in 2019 (max_id).

since_id = 1079954125616566272  # Returns results with an ID greater than (that is, more recent than) the specified ID.

max_id = 1212044012389044228    # Returns only statuses with an ID less than (that is, older than) or equal to the specified ID.

# create a Cursor object for the user_timeline method
cursor = tweepy.Cursor(api.user_timeline, 
                       screen_name=twitter_handle, 
                       since_id=since_id,
                       max_id=max_id,
                       include_rts=False,
                       tweet_mode='extended')

# put the retrieved tweets in a list
tweets = []

for tweet in cursor.items():
    tweets.append(tweet.full_text)

Wall time: 13.7 s


### Clean up the tweets

In [4]:
# set stop words (using nltk)
stops = set(stopwords.words('english'))

# add some additional custom stop words
# make sure they are all in lower case
additional_stops = {"’", "’", "“", "”", "'s", "n't", "'m", "'re", "'ll", "rt", "i.e", "amp"}
stops.update(additional_stops)

tweet_words = []

for tweet in tweets:
    # remove mentions
    tweet = re.sub('@[^\s]+', '', tweet)
    # remove URLs
    tweet = re.sub(r'http\S+', '', tweet)
    # remove stop words
    tweet_words.extend([word for word in TextBlob(tweet).words if word.lower() not in stops])

### Generate a word cloud

In [5]:
# get data directory
d = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd()

# read the mask image
mask = np.array(Image.open(os.path.join(d, "twitter_mask.png")))  

# configure the word cloud
wordcloud = WordCloud(width=1600, height=900,
                      colormap='viridis',         # you can choose other matplotlib colormaps
                      background_color='white', 
                      mask=mask)

# generate the word cloud
wordcloud = wordcloud.generate(' '.join(tweet_words))

# store to file
wordcloud = wordcloud.to_file('tweet_wordcloud.png')