In [1]:
import json
with open('twitterData.txt','r') as f:
    twtr_auth = json.load(f)

###### my twitter keys are stored in twitterData.txt

In [2]:
from twitter import *

In [3]:
# Loading my authentication tokens
#with open('twitterData.txt','r') as f:
#    twtr_auth = json.load(f)

# To make it more readable, lets store
# the OAuth credentials in strings first.
CONSUMER_KEY = twtr_auth['API_keys']
CONSUMER_SECRET = twtr_auth['API_secret_key']
OAUTH_TOKEN = twtr_auth['Access_token']
OAUTH_TOKEN_SECRET = twtr_auth['Access_token_secret']
    
# Then, we store the OAuth object in "auth"
auth = OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)
# Notice that there are four tokens - you need to create these in the
# Twitter Apps dashboard after you have created your own "app".

# We now create the twitter search object.
t = Twitter(auth=auth)

###### loading authentication tokens to use twitter APIs

In [4]:
import sys
import time
from twitter.api import TwitterHTTPError
from urllib.error import URLError
from http.client import BadStatusLine

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
    # A nested helper function that handles common HTTPErrors. Return an updated
    # value for wait_period if the problem is a 500 level error. Block until the
    # rate limit is reset if it's a rate limiting issue (429 error). Returns None
    # for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
        if wait_period > 3600: # Seconds
            print('Too many retries. Quitting.', file=sys.stderr)
            raise e
        if e.e.code == 401:
            return None
        elif e.e.code == 404:
            print('Encountered 404 Error (Not Found)', file=sys.stderr)
            return None
        elif e.e.code == 429:
            print('Encountered 429 Error (Rate Limit Exceeded)', file=sys.stderr)
            if sleep_when_rate_limited:
                print("Retrying in 15 minutes...ZzZ...", file=sys.stderr)
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print('...ZzZ...Awake now and trying again.', file=sys.stderr)
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print('Encountered %i Error. Retrying in %i seconds' % (e.e.code, wait_period), file=sys.stderr)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e

    # End of nested helper function

    wait_period = 2
    error_count = 0
    while True:
        try:
            return twitter_api_func(*args, **kw)
        except TwitterHTTPError as e:
            error_count = 0
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError as e:
            error_count += 1
            print("URLError encountered. Continuing.", file=sys.stderr)
            if error_count > max_errors:
                print("Too many consecutive errors...bailing out.", file=sys.stderr)
                raise
        except BadStatusLine as e:
            error_count += 1
            print >> sys.stderr, "BadStatusLine encountered. Continuing."
            if error_count > max_errors:
                print("Too many consecutive errors...bailing out.", file=sys.stderr)
                raise

###### as Twitter as a very stringent api rate limit, need to use a fucntion that will rest and restart after a certain interval

In [34]:
df = pd.DataFrame(columns=['screen_name','tweets','retweet','retweetcount','favcount'])
df.to_csv('allnomineesTweets2.csv', index=False)

###### create a csv file to store the fetch data

In [5]:
import pandas as pd
def harvest_user_timeline(t, screen_name, user_id=None, max_results=1000):  
    assert (screen_name != None) != (user_id != None)    
    #"Must have screen_name or user_id, but not both"            
    kw = {  # Keyword args for the Twitter API call        
        'count': 200,        
        'trim_user': 'true',        
        'include_rts' : 'true',        
        'since_id' : 1
    } 
    if screen_name:        
        kw['screen_name'] = screen_name 
    else:        
        kw['user_id'] = user_id            
    max_pages = 16    
    results = []        
    tweets = make_twitter_request(t.statuses.user_timeline, **kw)        
    if tweets is None: # 401 (Not Authorized) - Need to bail out on loop entry        
        tweets = []            
    results += tweets        
    print(sys.stderr, 'Fetched %i tweets' % len(tweets))
    page_num = 1        
    # Many Twitter accounts have fewer than 200 tweets so you don't want to enter    
    # the loop and waste a precious request if max_results = 200.        
    # Note: Analogous optimizations could be applied inside the loop to try and     
    # save requests. e.g. Don't make a third request if you have 287 tweets out of     
    # a possible 400 tweets after your second request. Twitter does do some     
    # post-filtering on censored and deleted tweets out of batches of 'count', though,    
    # so you can't strictly check for the number of results being 200. You might get    
    # back 198, for example, and still have many more tweets to go. If you have the    
    # total number of tweets for an account (by GET /users/lookup/), then you could     
    # simply use this value as a guide.        
    if max_results == kw['count']:        
        page_num = max_pages # Prevent loop entry        
    while page_num < max_pages and len(tweets) > 0 and len(results) < max_results:            
            # Necessary for traversing the timeline in Twitter's v1.1 API:        
            # get the next query's max-id parameter to pass in.        
            # See https://dev.twitter.com/docs/working-with-timelines.        
        kw['max_id'] = min([ tweet['id'] for tweet in tweets]) - 1             
        tweets = make_twitter_request(t.statuses.user_timeline, **kw)        
        results += tweets
        print(sys.stderr, 'Fetched %i tweets' % (len(tweets)))            
        page_num += 1            
    print(sys.stderr, 'Done fetching tweets')
    return results[:max_results] 

###### fucntion that will fetch tweets using a screen name with other parameters

In [6]:
nominees = ['@MichaelBennet','@JoeBiden','@CoryBooker','@PeteButtigieg','@JulianCastro','@jddelaney','@TulsiGabbard','@SenGillibrand'
           '@MikeGravel','@KamalaHarris','@Hickenlooper','@GovInslee','@amyklobuchar','@WayneMessam','@sethmoulton','@BetoORourke','@TimRyan'
           '@BernieSanders','@RepSwalwell','@realDonaldTrump','@ewarren','@GovBillWeld','@AndrewYang']

###### list of republicans as well as democrats nominees

In [7]:
for i in nominees:
    tweets = harvest_user_timeline(t, screen_name=i)
    for tweet in tweets:
        df = pd.DataFrame([[i, tweet['text'],tweet['retweeted'],tweet['retweet_count'],tweet['favorite_count']]], columns=['screen_name','tweets','retweet','retweetcount','favcount'])
        with open('allnomineesTweets2.csv','a',encoding='utf-8') as f:
            df.to_csv(f,header=False, index=False)
    

<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 199 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 178 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Done fetching tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 199 tweets
<ipykernel.iostream.OutStream object at 0x0000022B

Encountered 404 Error (Not Found)


<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 0 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Done fetching tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Done fetching tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B

Encountered 404 Error (Not Found)


<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 0 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Done fetching tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Done fetching tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 197 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B0F3BD278> Fetched 200 tweets
<ipykernel.iostream.OutStream object at 0x0000022B

###### finally call the function, fetch and save the tweets