In [32]:
import json, os, sys, time
from zipfile import ZipFile
from birdy.twitter import AppClient, UserClient, TwitterRateLimitError
from ratelimiter import RateLimiter

In [33]:
CONSUMER_KEY = 'F0pfYPKH22Egu5Fajzjvw1vO0'
CONSUMER_SECRET = 'Rt67vDPt5JeROHe2lEuFxvK6saul6DsMAtocYrAU2gn1ieLxZB'

In [34]:
OUTPUT_DIR = 'tweets'
MAX_TWEETS = 10000 # max results for a search
max_id = None
_client = None

In [35]:
def client(consumer_key=None, consumer_secret=None):
    global _client
    if consumer_key is None:
        consumer_key = CONSUMER_KEY
    if consumer_secret is None:
        consumer_secret = CONSUMER_SECRET
    if _client is None:
        _client = AppClient(consumer_key, consumer_secret)
        access_token = _client.get_access_token()
        _client = AppClient(consumer_key, consumer_secret, access_token)
    return _client


In [36]:
def limited(until):
    duration = int(round(until - time.time()))
    print('Rate limited, sleeping for {:d} seconds'.format(duration))


In [37]:
@RateLimiter(max_calls=440, period=60*15, callback=limited)
def fetch_tweets(query, consumer_key=None, consumer_secret=None):
    global max_id
    print(f'Fetching: "{query}" TO MAX ID: {max_id}')
    try:
        tweets = client(consumer_key, consumer_secret).api.search.tweets.get(
            q=query,
            count=100,
            max_id=max_id).data['statuses']
    except TwitterRateLimitError:
        sys.exit("You've reached your Twitter API rate limit. "\
            "Wait 15 minutes before trying again")
    try:
        id_ = min([tweet['id'] for tweet in tweets])
    except ValueError:
        return None
    if max_id is None or id_ <= max_id:
        max_id = id_ - 1
    return tweets

In [38]:
def initialize_max_id(file_list):
    global max_id
    for fn in file_list:
        n = int(fn.split('.')[0])
        if max_id is None or n < max_id:
            max_id = n - 1
    if max_id is not None:
        print('Found previously fetched tweets. Setting max_id to %d' % max_id)



In [39]:
def halt(_id):
    print('Reached historically fetched ID: %d' % _id)
    print('In order to re-fetch older tweets, ' \
        'remove tweets from the output directory or output zip file.')
    sys.exit('\n!!IMPORTANT: Tweets older than 7 days will not be re-fetched')


In [40]:
def search_twitter(query, consumer_key=None, consumer_secret=None,
            newtweets=False, dozip=True, verbose=False):
    output_dir = os.path.join(OUTPUT_DIR, '_'.join(query.split()))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if dozip:
        fn = os.path.join(output_dir, '%s.zip' % '_'.join(query.split()))
        outzip = ZipFile(fn, 'a')
    if not newtweets:
        if dozip:
            file_list = [f for f in outzip.namelist() if f.endswith('.json')]
        else:
            file_list = [f for f in os.listdir(output_dir) if f.endswith('.json')]
        initialize_max_id(file_list)
    while True:
        try:
            tweets = fetch_tweets(
                query,
                consumer_key=consumer_key,
                consumer_secret=consumer_secret)
            if tweets is None:
                print('Search Completed')
                if dozip:
                    outzip.close()
                break
            for tweet in tweets:
                if verbose:
                    print(tweet['id'])
                fn = '%d.json' % tweet['id']
                if dozip:
                    if fn in (file_list):
                        outzip.close()
                        halt(tweet['id'])
                    else:
                        outzip.writestr(fn, json.dumps(tweet, indent=4))
                        file_list.append(fn)
                else:
                    path = os.path.join(output_dir, fn)
                    if fn in (file_list):
                        halt(tweet['id'])
                    else:
                        with open(path, 'w') as outfile:
                            json.dump(tweet, outfile, indent=4)
                        file_list.append(fn)
                if len(file_list) >= MAX_TWEETS:
                    if fn in (file_list):
                        outzip.close()
                    sys.exit('Reached maximum tweet limit of: %d' % MAX_TWEETS)
        except:
            if dozip:
                outzip.close()
            raise


In [41]:
#search_twitter("Grubhub")
search_twitter("UberEats")
#search_twitter("DoorDash")          

Fetching: "UberEats" TO MAX ID: None
Fetching: "UberEats" TO MAX ID: 1112897677916696576
Fetching: "UberEats" TO MAX ID: 1112891541834481664
Fetching: "UberEats" TO MAX ID: 1112886002090049535
Fetching: "UberEats" TO MAX ID: 1112878799220236287
Fetching: "UberEats" TO MAX ID: 1112872895292862463
Fetching: "UberEats" TO MAX ID: 1112864824193474559
Fetching: "UberEats" TO MAX ID: 1112858424604278786
Fetching: "UberEats" TO MAX ID: 1112851887508934655
Fetching: "UberEats" TO MAX ID: 1112846438717784064
Fetching: "UberEats" TO MAX ID: 1112839410297569285
Fetching: "UberEats" TO MAX ID: 1112832158664601599
Fetching: "UberEats" TO MAX ID: 1112823728797310975
Fetching: "UberEats" TO MAX ID: 1112816676762869759
Fetching: "UberEats" TO MAX ID: 1112808603336011775
Fetching: "UberEats" TO MAX ID: 1112800056996880383
Fetching: "UberEats" TO MAX ID: 1112792216659853311
Fetching: "UberEats" TO MAX ID: 1112784666216394754
Fetching: "UberEats" TO MAX ID: 1112775472193200128
Fetching: "UberEats" TO MAX

SystemExit: Reached maximum tweet limit of: 10000

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
