In [1]:
import time
import requests
import os
import json
import csv


In [None]:
# bearer_token = ''
bearer_token = 'AAAAAAAAAAAAAAAAAAAAAEHhcAEAAAAA%2BjWBHf0ZufVxg%2FI4PEzQ4xBAGM4%3DbcCcTqOwR9bFrvGXxzIm1EBwMaBs0itXuVdX5PLJqLKhNI3CK1'

class RequestException(Exception):
    pass

# Reads a csv file with 1 user id on each line into a list
# Input: The name of the file with user ids
# (it's called ids_dir because it used to be a directory and I didn't change the name)
# Output: A list with each item a user id string from the file
def read_user_ids(ids_dir):
    ret = []
    # Open file with ids and read the lines
    with open(ids_dir) as f:
        lines = csv.reader(f)
        for line in lines:
            ret.append(line)
    # Convert every id into a string and store them in the list to return
    for i in range(len(ret)):
        item = ret[i]
        for char in item:
            item_str = ""
            char_str = str(char)
            item_str = item_str + char_str
            ret[i] = item_str
    return ret

# Writes user data into a file with json
# Input: json response
# Output: None
def write_to_file(response,outfile):
    with open(outfile, 'a') as outf:
        outf.write(json.dumps(response, indent=4, sort_keys=True))
    print("wrote to file")

# Creates the twitter URL for requests
# Input: list of user ids
# Output: the url for requests
def create_url(user_ids):
    user_ids_list = list(user_ids)
    user_ids_string = "ids="
    for id in user_ids_list:
        user_ids_string += str(id)
        user_ids_string+= ","
    user_ids_string = user_ids_string.rstrip(user_ids_string[-1])
    user_fields = "user.fields=description,created_at,entities,location,pinned_tweet_id,profile_image_url,protected,public_metrics,url,verified,withheld"
    url = "https://api.twitter.com/2/users?{}&{}".format(user_ids_string, user_fields)
    return url

# gets parameters
# Input: None
# Output: {"user.fields": "created_at"}
def get_params():
    return {"user.fields": "created_at"}

# Creates headers
# Input: bearer token
# Output: headers
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

# Connects to endpoint
# Input: url for request and headers for request
# Output: a json response
def connect_to_endpoint(url,headers):
    response = requests.request("GET", url, headers=headers)
    # print(response.status_code)
    if response.status_code != 200:
        if response.status_code == 429:
            raise RequestException(
                "Request returned an error: {} {}".format(
                    response.status_code, response.text
                )
            )
        else:
            raise Exception(
                "Request returned an error: {} {}".format(
                    response.status_code, response.text
                )
            )
    return response.json()

# Gets data for all of the users and calls write_to_file to write them to output file
# Input: list of user ids, headers
# Output: None
def execute(user_id_list,headers,outf):
    count = 0
    sleep_count = 0
    done = False
    print(len(user_id_list))
    # While loop to make sure we keep going until we get every user
    while not done:
        # Try and except to sleep until you can request to the API again
        try:
            # Cases for dividing up the list since we can only request users 100 at a time
            if len(user_id_list) > 99:
                print("Requesting for user: ", count)
                url = create_url(user_id_list[0:100])
                for i in range(100):
                    user_id_list.pop(0)
                json_response = connect_to_endpoint(url,headers)
                write_to_file(json_response,outf)
                count += 100
            elif len(user_id_list) < 99 and len(user_id_list) > 0:
                print("Final iteration")
                url = create_url(user_id_list)
                json_response = connect_to_endpoint(url,headers)
                write_to_file(json_response,outf)
                count += len(user_id_list)
                done = True
            else:
                done = True
        # Sleep every minute until we can request again. Note: this except statement
        # should be changed to the specific name of the request exception for safety/debugging purposes, but
        # I don't know what the name of the exception is
        except RequestException:
            print("Too many requests. Been sleeping... for ", sleep_count, " minutes probably until 15 minutes")
            # print("Sleeping count: ", sleep_count)
            sleep_count += 1
            time.sleep(60)
    print("Number of users: ", count)

# Main function
# Sets the name of the input and output files, calls read_user_ids, create_headers, and execute
# Prints "done" when done
def main():
    # Replace with your file
    input_ids_file = "/home/zackrack/Roe/Data/user_ids_big.csv"
    #Replace with your file
    output_info_file = "/home/zackrack/Roe/Data/user_data_big.csv"
    user_id_list = read_user_ids(input_ids_file)
    headers = create_headers(bearer_token)
    print(len(user_id_list))
    execute(user_id_list,headers,output_info_file)
    print("done")

if __name__ == "__main__":
    main()
