# Demo: tweepy

In [1]:
import os, sys, json 
import tweepy
import pandas as pd
import pprint
from collections import defaultdict

In [2]:
# read in enviroment variables for authentication
TWITTER_KEY = os.environ["TWITTER_KEY"]
TWITTER_SECRET = os.environ["TWITTER_SECRET"]
TWITTER_ACCESS_TOKEN = os.environ["TWITTER_ACCESS_TOKEN"]
TWITTER_ACCESS_SECRET = os.environ["TWITTER_ACCESS_SECRET"]

In [3]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(TWITTER_KEY, TWITTER_SECRET)
auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_SECRET)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [4]:
# read user timeline
user = '@POTUS'
response = api.user_timeline(user)

In [5]:
# looking at first response entry 
r = response[0]

In [6]:
# pretty print 
pprint.pprint(r)

{'contributors': None,
 'coordinates': None,
 'created_at': 'Thu Aug 30 16:58:27 +0000 2018',
 'entities': {'hashtags': [],
              'symbols': [],
              'urls': [],
              'user_mentions': [{'id': 822215673812119553,
                                 'id_str': '822215673812119553',
                                 'indices': [3, 14],
                                 'name': 'The White House',
                                 'screen_name': 'WhiteHouse'},
                                {'id': 25073877,
                                 'id_str': '25073877',
                                 'indices': [95, 111],
                                 'name': 'Donald J. Trump',
                                 'screen_name': 'realDonaldTrump'}]},
 'favorite_count': 0,
 'favorited': False,
 'geo': None,
 'id': 1035210149059874818,
 'id_str': '1035210149059874818',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_

In [7]:
# checking logic on first entry 
creation = r['created_at']
text = r['text']
print(creation, text)

Thu Aug 30 16:58:27 +0000 2018 RT @WhiteHouse: Economic gains exceeded expectations in the first full quarter since President @realDonaldTrump's Tax Cuts and Jobs Act too…


In [8]:
# create outer list to hold values 
df_stage = []
# iterate through json response 
for r in response:
    # grab values
    creation = r['created_at']
    text = r['text']
    # create a list of these values 
    lst = [creation, text]
    # append inner list to outer list 
    df_stage.append(lst)
    
# create column headers
columns = ['created_at','text']
# init dataframe
df = pd.DataFrame(df_stage, columns=columns)

In [9]:
# looking at first text entry
text = df.loc[0, 'text']
text

"RT @WhiteHouse: Economic gains exceeded expectations in the first full quarter since President @realDonaldTrump's Tax Cuts and Jobs Act too…"

In [10]:
# init counter object
d = defaultdict(int)

In [11]:
# define count function and check against one example 
def count_words(text):
    # break string into list of words
    words = text.split(' ')
    print(words)
    # iterate through words
    for word in words:
        # increment word counter
        d[word]+=1
        
# test run
count_words(text)

['RT', '@WhiteHouse:', 'Economic', 'gains', 'exceeded', 'expectations', 'in', 'the', 'first', 'full', 'quarter', 'since', 'President', "@realDonaldTrump's", 'Tax', 'Cuts', 'and', 'Jobs', 'Act', 'too…']


In [12]:
# run function against all text values
df.text.apply(count_words)

['RT', '@WhiteHouse:', 'Economic', 'gains', 'exceeded', 'expectations', 'in', 'the', 'first', 'full', 'quarter', 'since', 'President', "@realDonaldTrump's", 'Tax', 'Cuts', 'and', 'Jobs', 'Act', 'too…']
['RT', '@FLOTUS:', 'Congratulations', 'to', '@WhiteHouseHstry', 'on', 'a', 'successful', '#PresidentialSites', 'Summit.', 'It', 'was', 'great', 'hosting', 'you', 'at', 'the', '@WhiteHouse', 'http…']
['RT', '@realDonaldTrump:', 'STATEMENT', 'FROM', 'THE', 'WHITE', 'HOUSE\n\nPresident', 'Donald', 'J.', 'Trump', 'feels', 'strongly', 'that', 'North', 'Korea', 'is', 'under', 'tremendous', 'pressure…']
['RT', '@WhiteHouse:', 'Families', 'across', 'the', 'country', 'are', 'affected', 'by', 'substance', 'abuse,', 'and', 'President', "Trump's", 'was', 'no', 'different.', '\n\n"[My', 'brother', 'Fred]', 'h…']
['RT', '@realDonaldTrump:', '#StopTheBias', 'https://t.co/xqz599iQZw']
['RT', '@WhiteHouse:', 'New', 'report', 'just', 'in', 'from', '@CommerceGov:', 'Economic', 'growth', 'exceeded', 'origin

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
19    None
Name: text, dtype: object

In [13]:
# check output 
d

defaultdict(int,
            {'RT': 21,
             '@WhiteHouse:': 10,
             'Economic': 3,
             'gains': 2,
             'exceeded': 3,
             'expectations': 2,
             'in': 6,
             'the': 19,
             'first': 3,
             'full': 3,
             'quarter': 2,
             'since': 3,
             'President': 4,
             "@realDonaldTrump's": 2,
             'Tax': 2,
             'Cuts': 2,
             'and': 6,
             'Jobs': 2,
             'Act': 2,
             'too…': 2,
             '@FLOTUS:': 4,
             'Congratulations': 1,
             'to': 12,
             '@WhiteHouseHstry': 1,
             'on': 1,
             'a': 7,
             'successful': 1,
             '#PresidentialSites': 1,
             'Summit.': 1,
             'It': 2,
             'was': 4,
             'great': 1,
             'hosting': 2,
             'you': 1,
             'at': 3,
             '@WhiteHouse': 5,
             'http…': 1,
 

In [14]:
# return sorted (max first) list using lambda syntax
    # lambda x: (for each value of x)
    # d[x] (look at the value of d[x])
    # then (use this as the key to return a sorted list of keys in reverse order)
srted = sorted(d, key = lambda x: d[x], reverse=True)
srted

['RT',
 'the',
 'to',
 '@WhiteHouse:',
 'a',
 'in',
 'and',
 '@realDonaldTrump:',
 'that',
 'of',
 '@WhiteHouse',
 'President',
 '@FLOTUS:',
 'was',
 '&amp;',
 'Economic',
 'exceeded',
 'first',
 'full',
 'since',
 'at',
 'will',
 'with',
 'all',
 'I',
 'gains',
 'expectations',
 'quarter',
 "@realDonaldTrump's",
 'Tax',
 'Cuts',
 'Jobs',
 'Act',
 'too…',
 'It',
 'hosting',
 'is',
 'are',
 'by',
 'just',
 'from',
 'for',
 'different',
 'be',
 '2026',
 'our',
 'Mexico',
 'new',
 'benefits',
 'deal',
 'strong',
 'Congratulations',
 '@WhiteHouseHstry',
 'on',
 'successful',
 '#PresidentialSites',
 'Summit.',
 'great',
 'you',
 'http…',
 'STATEMENT',
 'FROM',
 'THE',
 'WHITE',
 'HOUSE\n\nPresident',
 'Donald',
 'J.',
 'Trump',
 'feels',
 'strongly',
 'North',
 'Korea',
 'under',
 'tremendous',
 'pressure…',
 'Families',
 'across',
 'country',
 'affected',
 'substance',
 'abuse,',
 "Trump's",
 'no',
 'different.',
 '\n\n"[My',
 'brother',
 'Fred]',
 'h…',
 '#StopTheBias',
 'https://t.co/xqz

In [15]:
# reformat most common 10 items into dictionary
    # dictionary comprehension
        # k:d[k] (return the value of k, mapped to the value of k within the d dictionary)
        # for k (iterating through each value of k)
        # in srted[:10] (for the first 10 values of srted)
        #{} (return in a dictionary)
srted_dict = {k:d[k] for k in srted[:10]}
srted_dict

{'RT': 21,
 'the': 19,
 'to': 12,
 '@WhiteHouse:': 10,
 'a': 7,
 'in': 6,
 'and': 6,
 '@realDonaldTrump:': 6,
 'that': 6,
 'of': 6}