In [1]:
import tweepy
import pandas as pd
import numpy as np
import os
from datetime import datetime
import datetime as dt
%matplotlib inline

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 1000)
pd.set_option('float_format', '{:f}'.format)

In [2]:
app_key = ''
app_key_secret = ''

# Tweepy

In [3]:
auth = tweepy.AppAuthHandler(app_key, app_key_secret)
api = tweepy.API(auth)


In [4]:
# Twitter-syntaxed queries
hashtags = ['#filmyourhospital', '#covid19', '#plandemic']
hashtag = hashtags[1]
q_originals = ' -filter:replies -filter:retweets -filter:quotes'
q_english = ' lang:en'
q_coordinates = ' has:coordinates'
q_verified = ' filter:verified'

query = hashtag

In [5]:
def search_tweets(query, i='', date=None, save=False, users=False):
    '''
    Searches for tweets and returns them as a DataFrame.
    
    Arguments:    
        - query: String with a twitter-syntaxed query for a status search.
        - i: Any String-able type of data to add to the filenames.
        - date: A datetime object representing today and now. If None: use datetime.now()
        - save: default=False. If True: saves the searched DataFrame into .csv files under '/data'.
        - users: default=False. If True: creates a second DataFrame for the users in each tweet, saves it if save=True, and returns two DataFrames instead.
        
    Returns:
        int: The lowest id among the statuses retrieved
        
    '''
    if not date:
        date = datetime.now()
    path = 'data'
    if not os.path.exists(path):
        os.makedirs(path)

    _tweets = []
    _users = []
    for tweet in tweepy.Cursor(api.search, q=query).items(100):
        _users.append(tweet.user._json)
        _tweets.append(tweet._json)
    df_tweets = pd.DataFrame.from_dict(_tweets)
    df_users = pd.DataFrame.from_dict(_users)

    if save:
        df_tweets.to_csv(path+'/tweets_'+str(date).replace(' ', '_').replace(':', '-').split('.')[0]+'_'+str(i)+'.csv')
        if users:
            df_users.to_csv(path+'/users_'+str(date).replace(' ', '_').replace(':', '-').split('.')[0]+'_'+str(i)+'.csv')
    
    return min(df_tweets['id'])

In [6]:
_tweets = []
for tweet in tweepy.Cursor(api.search, q=query).items(100):
    _tweets.append(tweet._json)
df_tweets = pd.DataFrame.from_dict(_tweets)


In [7]:
def exhaust_search(api, query, limit=None):
    '''
    Searches and saves all tweets into .csv files. The search is limited by the current remaining application requests from api.
    
    Parameters:
        - api: Tweepy.API object with an application authorization
        - query: String with a twitter-syntaxed query for a status search.
        - limit: Maximum number of API requests to make to Twitter.
        
    Returns:
        None
    '''
    
    status = api.rate_limit_status()
    if not limit:
        limit =  status['resources']['search']['/search/tweets']['limit']
    
    date = datetime.now()
    remaining_requests = status['resources']['search']['/search/tweets']['remaining']
    until = status['resources']['search']['/search/tweets']['reset']
    until = datetime.fromtimestamp(until)
    now = datetime.now()
    time_to_reset = until - now



    if remaining_requests <= 0:
        raise Exception(f'You have reached your API request limit. You will get another {limit} requests in:\n{time_to_reset.seconds} Seconds ({until})')

    if remaining_requests < limit:
        limit = remaining_requests
        print(f'You have {remaining_requests} requests until {until}')


    previous_max_id = ''
    for i in range(limit):
        id = search_tweets(query+previous_max_id, i+1, date, save=True)
        previous_max_id = ' max_id:' + str(id)

        print(str(round(i*100/limit, 2)) + '%', end='\t')
       

In [8]:
try:
    exhaust_search(api, query)
except Exception as e:
    print(e)

You have 443 requests until 2020-05-09 20:59:08
0.0%	0.23%	0.45%	0.68%	0.9%	1.13%	1.35%	1.58%	1.81%	2.03%	2.26%	2.48%	2.71%	2.93%	3.16%	3.39%	3.61%	3.84%	4.06%	4.29%	4.51%	4.74%	4.97%	5.19%	5.42%	5.64%	5.87%	6.09%	6.32%	6.55%	6.77%	7.0%	7.22%	7.45%	7.67%	7.9%	8.13%	8.35%	8.58%	8.8%	9.03%	9.26%	9.48%	9.71%	9.93%	10.16%	10.38%	10.61%	10.84%	11.06%	11.29%	11.51%	11.74%	11.96%	12.19%	12.42%	12.64%	12.87%	13.09%	13.32%	13.54%	13.77%	14.0%	Twitter error response: status code = 429
