In [None]:
from twikit import Client
from twikit import BadRequest
from math import ceil
import pandas as pd
import numpy as np
import time
import json
import sqlite3

# this API requires authentication
f = open('authentication.txt', 'r')
auth = f.read()
f.close()
auth_token = auth.split("\n")

# don't hardcode your email and password into something!!!
# the auth is in gitignore so I won't get hacked
username = str(auth_token[0])
email = str(auth_token[1])
password = str(auth_token[2])

# Initialize client
client = Client(language='en-US', http2=True)

In [None]:
def authentication(username, email, password):
    try:
        # Login to the service with provided user credentials
        client.login(
            auth_info_1=username ,
            auth_info_2=email,
            password=password)

        print("Login successful!")
        return True

    except BadRequest:
        print("Login unsuccessful. One or more login parameters is incorrect.")
        return False

In [None]:
authentication(username, email, password)

In [None]:
# Twitter LOVES to ban people when they log in repeatedly
# saving the cookies makes sure I don't get banned (often)

client.get_cookies()
client.save_cookies('IGNOREcookies.json')
with open('IGNOREcookies.json', 'r', encoding='UTF8') as f:
    client.set_cookies(json.load(f))

In [None]:
from twikit import UserUnavailable
from twikit import UserNotFound

def get_user_id(handle):
    # load the cookies so you don't login a million times and get banned
    client.load_cookies('IGNOREcookies.json')
    
    # pull IDs and insert them back into the list
    try:
        user_id = client.get_user_by_screen_name(handle)
        print(f'Success {handle}, {user_id}')

    # if user is inaccessible these will keep the loop from breaking
    except UserUnavailable:
        print(f"User {handle} unavailable")
    except UserNotFound:
        print(f'User {handle} not found')
    
    # this endpoint has a rate limit of 95 hits per 15 minutes
    # 15 min = 900 seconds
    # 900//95 = 9.473
    # round it up to 10 just to be sure
    # allows the program to run automatically
    time.sleep(10)

    return(user_id)

In [None]:
def create_key(user_id, year):
    return(int(str(user_id) + str(year)))

In [None]:
from math import ceil
import requests
from twikit import TooManyRequests
from twikit.utils import Endpoint

# this is a housekeeping function
# twitter API can throw rate limits
# they're kind of like timeouts
# this function just shows me how much longer I will be in timeout for

def get_limit_reset_time(endpoint: str):
    res = requests.get(
        endpoint,
        headers=client._base_headers,
        cookies=client.get_cookies()
    )
    return ceil(int(res.headers['x-rate-limit-reset']) - time.time())

In [None]:
# timeout check for accessing user IDs
def get_rate_limit_user_by_screen_name():
    try:
        print(client.get_user_by_screen_name('BarackObama'))
    except TooManyRequests:
        
        reset_time = get_limit_reset_time(Endpoint.USER_TWEETS)
        print(f'rate limit is reset after {reset_time} seconds.')

In [None]:
# we need the data in list format to process it
def flatten_csv(csv, index_column=False):
    data = pd.read_csv(csv, keep_default_na=False, index_col=index_column)
    df = pd.DataFrame(data)

    # turn pandas into list
    # there is probably a better way to do this but this works 
    data_list = []
    for index, row in df.iterrows():

        row_list = df.loc[index, :].values.flatten().tolist()
        data_list.append(row_list)
    
    return data_list

In [None]:
def get_user_ids(user_list, handle_index):
    user_id_list = []

    for item in user_list:
        user_id = get_user_id(item[handle_index])

        # insert the ID into the list
        item[0] = user_id.id
        user_id_list.append(item)
        print(item)
    return user_id_list

In [None]:
data_list = flatten_csv('data\politicians.csv', index_column=0)
user_id_list = []

get_user_ids(data_list, -1)

In [None]:
# read in coordinates data
data = flatten_csv('data\coordinates.csv', index_column=0)

In [None]:
def get_twitter_creation_year(user):
    client.load_cookies('IGNOREcookies.json')
    created = int(str(user.created_at_datetime)[0:4])
    return created

In [None]:
def process_coordinates(data_list, handle_index, name_index, year_index):

    for item in data_list:

        handle = item[handle_index]
        user = get_user_id(handle)
        created = get_twitter_creation_year(user)
        year = item[year_index]
        
        # if the twitter account was created before or during the election year
        # then it's True since we can pull tweets
        if created <= int(year):
            item[-1] = True
        
        # otherwise we can't pull tweets for this election for this politician
        else:
            item[-1] = False
        
        print(f"{item[name_index]} processed")
    
    return data_list

In [None]:
def ingest_data_table_coordinates(data_list):
    # set numpy INT and BOOL to adapt to SQL acceptable data forms
    sqlite3.register_adapter(np.int64, int)
    sqlite3.register_adapter(np.bool_, str)

    # set up SQL connection and cursor
    conn = sqlite3.connect('tweets.db')
    c = conn.cursor()
    c.executemany("INSERT INTO coordinates VALUES (?,?,?,?,?,?,?,?,?,?)", data_list)
    conn.commit()