In [66]:
import twitter
import datetime
import sys
import time

### Twitter API Authorization
It is good practice to store credentials in a text file so that if/when you push your notebook to Github, you don't have to remember to remove the credentials every time.

In [67]:
print("Authorizing...")

with open('twitter_auth.txt') as f:
    file_content = f.readlines()
    file_content = [x.strip() for x in file_content]

CONSUMER_KEY = file_content[0]
CONSUMER_SECRET = file_content[1]
OAUTH_TOKEN = file_content[2]
OAUTH_TOKEN_SECRET = file_content[3]

#twitter authorization
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.TwitterStream(auth=auth)
  
if (not twitter_api):
    print ("Can't Authenticate")
    sys.exit(-1)

print("Authorization successful")
    

Authorizing...
Authorization successful


### Streaming API
The streaming API rate limit is not publicized. For this API, you are not limited by number of tweets, but number of requests. A request is like a unique opening of the Twitter Stream. If you need to make multiple requests, it is possible to lump them all together into one request. If you need to keep track of which request corresponds to which tweet, however, you will have to loop through them individually. This is when you might run into rate limiting problems because each query constitutes a separate request. 

For example, if you want tweets that you think might be questions, you would run a single query (request) for anything that has a character in `['?', 'where', 'what', 'how',...]` as you wouldn't need to keep track of which tweet came from which query. 

If you wanted 200 tweets from each of `['?', 'where', 'what', 'how']` however, you might need to make separate requests for each. Additionally, there is some limit on how many queries you can jam into one request. 

### From [Twitter](https://dev.twitter.com/streaming/overview/connecting):
>Rate limiting
>Clients which do not implement backoff and attempt to reconnect as often as possible will have their connections rate limited for a small number of minutes. Rate limited clients will receive HTTP 420 responses for all connection requests.

>Clients which break a connection and then reconnect frequently (to change query parameters, for example) run the risk of being rate limited.

>Twitter does not make public the number of connection attempts which will cause a rate limiting to occur, but there is some tolerance for testing and development. A few dozen connection attempts from time to time will not trigger a limit. However, it is essential to stop further connection attempts for a few minutes if a HTTP 420 response is received. If your client is rate limited frequently, it is possible that your IP will be blocked from accessing Twitter for an indeterminate period of time.

Also:

>Back off exponentially for HTTP 420 errors. Start with a 1 minute wait and double each attempt. Note that every HTTP 420 received increases the time you must wait until rate limiting will no longer will be in effect for your account.

In [None]:
# the queries we are going to run
qs = ['the', 'an', 'it', 'who', 'were']

# the twitter stream object
twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)

# initialize the counters
requests = 0
backoff_timer = 60 # this is how long we'll sleep if we get rate limited
sleep_timer = 0 # this is how long we'll sleep after each query

uids = []

# we are going to iterate a few times to demonstrate
for iters in range(0,3):
    
    # for each query in the queries
    for q in qs:
        
        requests += 1 # count of requests made
        count=0 # count of tweets per query
        
        ### this is the chunk that handles rate limiting ################
        
        while True:
            try: # try to open the stream
                stream = twitter_stream.statuses.filter(track=q)
            
            except Exception as e: # if it doesn't work (i.e. we were limited)
                print('rate limited...sleeping for {0} seconds'.format(sleep_timer))
                sys.stdout.flush()
                time.sleep(backoff_timer) # 'back off' for a certain amount of time
                backoff_timer = backoff_timer * 2 # double the backoff timer
                
                ### since we got rate limited, we must not be sleeping long enough per request
                sleep_timer = sleep_timer + 2 # add 2 seconds to the sleep timer
                
            break
            
        ###################################################################
        
        
        
        for tweet in stream:

                uids.appned(tweet['user']['id'])
                
                count += 1

                if count % 100 == 0:
                    
                    print('Request {0} complete'.format(requests))
                    print('100 tweets seen from "{0}"'.format(q))
                    print(datetime.datetime.now())
                    
                    sys.stdout.flush()
                    
                    break
        time.sleep(sleep_timer)

In [None]:
len(uids)

In [71]:
t = twitter.Twitter(auth=auth)

tweets = t.statuses.user_timeline(screen_name="billybob")

239257191