# Get followers

## Setup, Twitter tokens, inputs

In [1]:
from configparser import ConfigParser
import pickle
import time
from random import randint

import pandas as pd
import tweepy


# Accounts input
accounts_filename = "edang-firstvoters-info-samples.xlsx"
accounts_sheet = "samples"
screen_name_column = "screen_name"
id_column = "id_str"

# Read Twitter API keys and tokens from a config file
config_object = ConfigParser()
config_object.read("config.ini")
twitter_auth = config_object["TWITTER_AUTH"]

consumer_key = twitter_auth["CONSUMER_KEY"]
consumer_secret = twitter_auth["CONSUMER_SECRET"]
access_token = twitter_auth["ACCESS_TOKEN"]
access_token_secret = twitter_auth["ACCESS_TOKEN_SECRET"]


def print_progress(i: int):
    if (i+1) % 10 == 0:
        print("/", end="")
    elif (i+1) % 5 == 0:
        print(",", end="")
    else:
        print(".", end="")


auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True, retry_count=1, retry_delay=2)

users_df = pd.read_excel(accounts_filename, sheet_name=accounts_sheet)
ids = users_df[id_column].to_list()

screen_names = users_df[screen_name_column].to_list()

print(f"Screen names: {screen_names[:1]} .. {screen_names[-1:]}")
print(f"Total: {len(screen_names):,}")

## Get followers of each user

In [None]:
# Wait to get full remaining calls
def to_sleep(reset: int):
    secs_to_sleep = reset - int(time.time())
    if secs_to_sleep > 180:
        secs_to_sleep = 180 + randint(1, 20)
    return secs_to_sleep

rate_limit_status = api.rate_limit_status()["resources"]["followers"]["/followers/list"]
rate_limit = rate_limit_status["limit"]
remaining = rate_limit_status["remaining"]
limit_reset = rate_limit_status["reset"]

while remaining < rate_limit - 1 and limit_reset < 60:
    secs_to_sleep = to_sleep(limit_reset)
    print(f"Calls remaining: {remaining}. Sleeping for: {secs_to_sleep}")
    time.sleep(secs_to_sleep)

    rate_limit_status = api.rate_limit_status()["resources"]["followers"]["/followers/list"]
    remaining = rate_limit_status["remaining"]
    limit_reset = rate_limit_status["reset"]

# Start collecting
followers = {}
for i, name in enumerate(screen_names):
    print(f"{i+1:3}/{job_len} Screen name: {name} |", end="")

    _followers = []
    try:
        cursor = tweepy.Cursor(api.followers, screen_name=name)
        for p, page in enumerate(cursor.pages()):
            _followers.extend(page)
            print_progress(p)
            time.sleep(randint(58, 62))  # ~60 seconds per page, to stay in rate limit
    except tweepy.TweepError as e:
        print(f"Error: {e.api_code}")
        print(e.reason)
    else:
        print(f"| Done.")
        followers[name] = _followers

    time.sleep(randint(1, 3))

len(followers)

## Save data to file

In [None]:
print(f"Save: {len(followers):,} entries")
with open("followers.pkl", "wb") as file:
    pickle.dump(followers, file)