In [1]:
import json
import tweepy
import os
import datetime
from collections import defaultdict

In [69]:
class TwitterAPI():
    def __init__(self, credentials_file_path = os.path.join(os.getcwd(),'credentials/twitter_credentials.json')):
        
        credentials = self.getCredentials(credentials_file_path)
        auth = tweepy.OAuthHandler(credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET'])
        auth.set_access_token(credentials['ACCESS_TOKEN'], credentials['ACCESS_TOKEN_SECRET'])
        self.api = tweepy.API(auth, wait_on_rate_limit=True)

    def getCredentials(self, credentials_file_path):
        with open(credentials_file_path) as json_file:
            credentials = json.load(json_file)
        return credentials
    
    def getTweetsByCount(self, query, count, loc=None, lang='en', result_type='mixed'):
        _max_queries = 500  # arbitrarily chosen value
        
        # If Country(loc) given, search on basis of country
        if(loc):
            placeId = self.getPlaceIdByCountry(loc)
            query = '{} place:{}'.format(query, placeId)

        tweets = tweet_batch = self.api.search(q=query, count=count, lang=lang, result_type=result_type, tweet_mode='extended')
        ct = 1
        while len(tweets) < count and ct < _max_queries:
            tweet_batch = self.api.search(q=query, 
                                     count=count - len(tweets),
                                     max_id=tweet_batch.max_id, lang=lang, result_type=result_type, tweet_mode='extended')
            tweets.extend(tweet_batch)
            ct += 1
        return tweets
    
    def getTweets7DaysByCount(self, query, count, loc=None, lang='en', result_type='mixed'):

        listOfDates = self.getListOfLast7Dates(datetime.date.today())
        
        # If Country(loc) given, search on basis of country
        if(loc):
            placeId = self.getPlaceIdByCountry(loc)
            query = '{} place:{}'.format(query, placeId)
        
        d = defaultdict(list)
        for dateIdx in range(len(listOfDates)-1):
            tweets_cursor = self.getCursor(query=query, dates=(listOfDates[dateIdx], listOfDates[dateIdx+1]), count=count, lang=lang, result_type=result_type)
            for tweet in tweets_cursor:
                d[datetime.datetime.strptime(listOfDates[dateIdx], '%Y-%m-%d')].append((tweet.id, tweet.full_text))

        return d

        
    def getCursor(self, query, dates, count, lang, result_type):
        cursor = tweepy.Cursor(
            self.api.search,
            q = query,
            since = dates[0],
            until = dates[1],
            lang = lang,
            result_type = result_type,
            tweet_mode ='extended')
        return cursor.items(count)
        
        
    def getListOfLast7Dates(self, end_date):
        start_date = end_date - datetime.timedelta(days=6)
        delta = datetime.timedelta(days=1)
        listOfDates = []
        while start_date <= end_date+datetime.timedelta(days=1):
            listOfDates.append(start_date.strftime('%Y-%m-%d'))
            start_date += delta
        return listOfDates
        

    def getFeatures(self, tweets, feature_list):
        features = []
        for tweet in tweets:
            content = {}
            for feature in feature_list:
                content[feature] = tweet._json[feature]
            features.append(content)
        return features
    
    def getPlaceIdByCountry(self, loc, granularity="country"):
        places = self.api.geo_search(query=loc, granularity="country")
        return places[0].id

### Testing by counts

In [21]:
datetime.datetime.strptime('2020-11-15', "%Y-%m-%d")

datetime.datetime(2020, 11, 15, 0, 0)

In [8]:
api = TwitterAPI()

In [95]:
tweets = api.getTweetsByCount('happy', 2)

In [97]:
len(tweets)

2

### Testing by dates

In [62]:
api = TwitterAPI()

In [63]:
%%time
tweets_dict = api.getTweets7DaysByCount('covid', 100)

CPU times: user 1.21 s, sys: 147 ms, total: 1.35 s
Wall time: 32.7 s
