In [None]:
import os
import glob
import json
import time
import pickle

import tweepy
import pandas as pd
import concurrent.futures
from tqdm import tqdm, notebook

from itertools import compress 
from datetime import datetime

In [None]:
class MultiTweets():
    def __init__(self, keys_paths, usernames):
        self.keys = self.read_key(keys_paths)
        self.usernames = usernames
        self.apis = self.auth_twitter()
        self.api_statuses = [True] * len(self.apis)

    def read_key(self, key_paths):
        return [pickle.load(open(path, 'rb')) for path in key_paths]
    
    def auth_twitter(self):
        api_list = []
        for key in self.keys:
            auth = tweepy.OAuthHandler(key["api_key"], key["api_secret_key"])
            auth.set_access_token(key["access_token"], key["access_token_secret"])
            api = tweepy.API(auth)
            
            api_list.append(api)
            
        return api_list
    
    def get_free_token(self):
        idx_tokens = list(compress(range(len(self.api_statuses)), self.api_statuses))
        
        if len(idx_tokens) > 0:
            index = idx_tokens[0]
            return self.apis[index], index
        else:
            return None, None

    def output(self, data, path_dir, filename):
        
        try:
            with open(os.path.join(path_dir, filename + '.json'), 'w') as f:
                f.write(json.dumps(data))
        except:
            try:
                with open(os.path.join(path_dir, filename + '.json'), 'w') as f:
                    f.write(data)
            except:
                pickle.dump(data, open(os.path.join(path_dir, filename + '.pkl'), 'wb'))
                
    def limit_handled(self, cursor):
        while True:
            try:
                yield cursor.next()
            except tweepy.RateLimitError:
                print('\tRateLimit', datetime.today().strftime("\t%H:%M:%S %d-%m-%Y"))
                time.sleep(15 * 60)
            except tweepy.TweepError as e:
                msg = e
                if "Failed to send request" in msg.reason:
                    pass
                elif '429' in msg.reason:
                    print('\tRateLimit', datetime.today().strftime("\t%H:%M:%S %d-%m-%Y"))
                    time.sleep(15 * 60)
                else:
                    return
            except StopIteration:
                return
        
    def get_user_timeline(self, username, api, index_token, total_tweets):
        all_tweets = []
        for tweet in self.limit_handled(tweepy.Cursor(api.user_timeline, username).items(total_tweets)):
            all_tweets.append(tweet._json)
            
            if (len(all_tweets) % 100) == 0:
                len_tweets = len(all_tweets)
                print(f"{username} Tweets Collected: {len_tweets}")
        
        self.output(all_tweets, './raw', username)
            
        self.api_statuses[index_token] = True

In [None]:
RESOURCE_PATH = '../../adi-buzzer'

In [None]:
usernames = {
    "PuisiDariHati": 5000,
    "Bait_Puisi": 5000,
    "LenteraSajak": 5000
}

In [None]:
data_dir_path = os.path.join(RESOURCE_PATH, 'data')
key_dir_path = os.path.join(RESOURCE_PATH, 'keys')

In [None]:
key_paths = glob.glob(key_dir_path + '/*')

In [None]:
multitweets = MultiTweets(key_paths, list(usernames.keys()))

In [None]:
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    print("usernames", usernames)
    for username, total_tweets in usernames.items():
        api, index_token = multitweets.get_free_token()
        multitweets.api_statuses[index_token] = False    
        executor.submit(multitweets.get_user_timeline, username, api, index_token, total_tweets)

In [None]:
multitweets.get_user_timeline(username, api, index_token, total_tweets)