# Get users info

## Read config and first voters list

In [None]:
from configparser import ConfigParser
from typing import List

import time
from random import randint

import pandas as pd
import tweepy


# Accounts input
accounts_file_name = "edang-firstvoters.xlsx"
accounts_sheet_name = "accounts"
screen_name_column = "Screen Name"

# Read Twitter API keys and tokens from a config file
config_object = ConfigParser()
config_object.read("config.ini")
twitter_auth = config_object["TWITTER_AUTH"]

consumer_key = twitter_auth["CONSUMER_KEY"]
consumer_secret = twitter_auth["CONSUMER_SECRET"]
access_token = twitter_auth["ACCESS_TOKEN"]
access_token_secret = twitter_auth["ACCESS_TOKEN_SECRET"]
    
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)

twitter_user_info_fields = {
    "id": "int64",
    "id_str": "str",
    "screen_name": "str",
    "name": "str",
    "friends_count": "int32",
    "followers_count": "int32",
    "statuses_count": "int32",
    "location": "str",
    "lang": "category",
    "time_zone": "category",
    "created_at": "datetime64",
    "protected": "bool",
    "verified": "bool",
    #"suspended": "bool",
    #"needs_phone_verification": "bool",
}

twitter_user_info_defaults = {
    "id": 0,
    "id_str": "",
    "screen_name": "",
    "name": "",
    "friends_count": 0,
    "followers_count": 0,
    "statuses_count": 0,
    "location": "",
    "lang": "",
    "time_zone": "",
    "created_at": None,
    "protected": False,
    "verified": False,
}

# Prepare empty data frame for Twitter user info
users_df = pd.DataFrame(columns=twitter_user_info_fields.keys())
for field, dtype in twitter_user_info_fields.items():
    users_df[field] = users_df[field].astype(dtype)
users_df["fetching_notes"] = ""

screen_names_df = pd.read_excel(accounts_file_name, sheet_name=accounts_sheet_name)
screen_names_df.head()

In [None]:
screen_names_df.describe()

## Get info of each user

In [None]:
rate_limit_status = api.rate_limit_status()
rate_limit = rate_limit_status["resources"]["users"]["/users/show/:id"]["limit"]

job_len = len(screen_names_df[screen_name_column])

rows = []
for i, name in enumerate(screen_names_df[screen_name_column]):
    row = twitter_user_info_defaults.copy()
    row.update({"screen_name": name})
    try:
        user = api.get_user(screen_name=name)
    except tweepy.TweepError as e:
        print(f"{i+1}: Fetching info of screen name: {name}")
        print("Error:", e.api_code)
        print(e.reason)
    else:
        for field in twitter_user_info_fields:
            if hasattr(user, field):
                tmp = {field: getattr(user, field)}
            row.update(tmp)

    rows.append(row)

    # Avoid hitting rate limit
    if i % int(rate_limit/4) == int(rate_limit/4) - 1:
        secs_to_sleep = 0
        print(f"Runnning: {i+1}/{job_len}")
        try:
            rate_limit_status = api.rate_limit_status()
            remaining = rate_limit_status["resources"]["users"]["/users/show/:id"]["remaining"]
            reset = rate_limit_status["resources"]["users"]["/users/show/:id"]["reset"]
            if (remaining / rate_limit) < 0.10:
                secs_to_sleep = int(rate_limit * 0.8) + randint(65, 125)
                print(f"Remaining use: {remaining}. Reaching rate limit. Sleeping for: {secs_to_sleep}")
        except tweepy.TweepError as e:
            secs_to_sleep = int(rate_limit/4) - randint(0, 30) + randint(0, 30)
            print(f"Cannot get remaining use. Resource used for: {i+1}. Sleeping for: {secs_to_sleep}")

        time.sleep(secs_to_sleep)

users_df = users_df.append(rows)
users_df

## Write user info to Excel file

In [None]:
with pd.ExcelWriter("users-info.xlsx") as writer:
    users_df.to_excel(writer, "new", index=False)