<a href="https://colab.research.google.com/github/jakartaresearch/adi-buzzer/blob/dev/notebook/R1_get_tweet_retweet_reply_300.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tweepy --quiet

# Setup & Import Library

In [None]:
import pandas as pd
import pickle
import tweepy
import json
from tqdm import tqdm
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from collections import Counter

# Mount Google Drive
from google.colab import drive # import drive from google colab

ROOT = "/content/drive/"     # default location for the drive
drive.mount(ROOT)           # we mount the google drive at /content/drive

In [None]:
buzzer_path = 'drive/My Drive/Colab Notebooks/adi-buzzer/List of Buzzer.csv'
key_path = 'drive/My Drive/Colab Notebooks/adi-buzzer/twitter_credentials.pkl'

In [None]:
class Twitter():
    def __init__(self, buzzer_path, key_path, idx):
        self.usernames = self.read_csv(buzzer_path)
        self.key0, self.key1, self.key2 = self.get_key(key_path)
        self.keys = [self.key0, self.key1, self.key2]
        self.use_key = self.keys[idx]
        self.api = self.auth(self.use_key['consumer_key'], self.use_key['consumer_secret'],
                             self.use_key['access_token'], self.use_key['access_token_secret'])

    def read_csv(self, path):
        data = pd.read_csv(path)
        data = data.username
        return data
    
    def get_key(self, key_path):
        with open(key_path, 'rb') as f:
            twitter_credentials = pickle.load(f)
        key0 = twitter_credentials[0]
        key1 = twitter_credentials[1]
        key2 = twitter_credentials[2]
        return key0, key1, key2

    def auth(self, consumer_key, consumer_secret, access_token, access_token_secret):
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        return api

    def get_info(self, target):
        item = self.api.get_user(target)
        return {'name':item.name, 's_name':item.screen_name,
                'desc':item.description, 'status_count':item.statuses_count,
                'friends_count':item.friends_count, 'followers_count':item.followers_count}
    
    def get_hashtags(self, target, duration, max_tag):
        hashtags = []
        end_date = datetime.utcnow() - timedelta(days=duration)

        for status in self.error_handle_cursor(tweepy.Cursor(self.api.user_timeline, id=target).items()):
            if hasattr(status, "entities"):
                entities = status.entities
                if "hashtags" in entities:
                  for ent in entities["hashtags"]:
                    if ent is not None:
                      if "text" in ent:
                        hashtag = ent["text"]
                        if hashtag is not None:
                          hashtags.append(hashtag)

            if status.created_at < end_date:
                break
        return Counter(hashtags).most_common(max_tag)

    def get_mentions(self, target, duration, max_tag):
        mentions = []
        end_date = datetime.utcnow() - timedelta(days=duration)

        for status in self.error_handle_cursor(tweepy.Cursor(self.api.user_timeline, id=target).items()):
            if hasattr(status, "entities"):
                entities = status.entities
                if "user_mentions" in entities:
                  for ent in entities["user_mentions"]:
                    if ent is not None:
                      if "screen_name" in ent:
                        name = ent["screen_name"]
                        if name is not None:
                          mentions.append(name)

            if status.created_at < end_date:
                break

        return Counter(mentions).most_common(max_tag)

    def get_data(self, target):
        tweets, retweets, replies = [], [], []
        status_count = 0

        for status in self.error_handle_cursor(tweepy.Cursor(self.api.user_timeline, id=target).items()):
            status_count += 1

            # Retweet
            if status._json['text'][:2] == 'RT':
              retweets.append(status)
            # Reply
            elif status._json['in_reply_to_screen_name'] != None:
              replies.append(status)
            # Tweet
            elif (status._json['in_reply_to_screen_name'] == None) and (status._json['text'][:2] != 'RT'):
              tweets.append(status)
            if len(tweets) == 300:
                break

        return {'username':target, 'status_count':status_count,
                'retweets':retweets, 'replies':replies, 'tweets':tweets, 'error_code':self.error_code}

    def error_handle_cursor(self, cursor):
        while True:
            try:
                yield cursor.next()
                self.error_code = 'none'
            except tweepy.error.TweepError as e:
                print("\n[error: tweepy] [{}]".format(e))
                e = str(e)
                if e[-3:] == '401': 
                    self.error_code = '401 : account_suspended_or_locked'
                    print(self.error_code) 
                    break
                elif e[-3:] == '404':
                    self.error_code = '404 : account_not_found' 
                    print(self.error_code)
                    break
            except:
                break 

    def save_data(self, var, out_path):          
        with open(out_path, 'wb') as f:
            pickle.dump(var, f)

In [None]:
# Create object
twitter_key0 = Twitter(buzzer_path, key_path, 0)
twitter_key1 = Twitter(buzzer_path, key_path, 1)
twitter_key2 = Twitter(buzzer_path, key_path, 2)

twitter_keys = [twitter_key0, twitter_key1, twitter_key2]

## Get Data for all usernames and save it

In [None]:
username = twitter_key0.usernames

In [None]:
def show_rate_limit_status(idx):
    rate_limit = twitter_keys[idx].api.rate_limit_status()['resources']['statuses']['/statuses/user_timeline']
    return rate_limit

In [None]:
for i in range(3):
    print(show_rate_limit_status(i))

In [None]:
import time
out_path = 'drive/My Drive/Colab Notebooks/adi-buzzer/data/'
check = 0
idx = 0
n_keys = 3
data = username

with tqdm(total=len(data), position=0, leave=True) as pbar:
    for user in tqdm(data, position=0, leave=True):
        print('\nUsername :', user)
        print('Current Key :', idx)
        data = twitter_keys[idx].get_data(target=user)
        print('Total tweets :', len(data['tweets']))
        print(show_rate_limit_status(idx))
        print('\n')
        twitter_keys[idx].save_data(data, (out_path+str(user)+'.pkl'))
        check += 1
        if check % 3 == 0 :
            idx += 1
            print('\nChange Key :', idx,'\n')
        if idx == n_keys :
            idx = 0
        if check % 18 == 0 :
            print('\n----SLEEP----')
            time.sleep(15*60)
        with open(out_path+'summary.txt', "a+") as text_file:
            text_file.write(str(user)+' = '+str(len(data['tweets']))+'\n')
        time.sleep(3)
        pbar.update()