In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
from twarc.client2 import Twarc2
from dotenv import load_dotenv
from pprint import pprint

# Setup environment
load_dotenv()
BEARER_TOKEN = os.getenv('BEARER_TOKEN')
API_KEY = os.getenv('API_KEY')  # cosumer_key
API_KEY_SECRET = os.getenv('API_KEY_SECRET')  # consumer_secret
ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
ACCESS_TOKEN_SECRET = os.getenv('ACCESS_TOKEN_SECRET')
MY_ID = os.getenv('MY_ID')

# Setup client
t = Twarc2(consumer_key=API_KEY, consumer_secret=API_KEY_SECRET,
           access_token=ACCESS_TOKEN, access_token_secret=ACCESS_TOKEN_SECRET,
           bearer_token=BEARER_TOKEN, )


# Main heavy lifting guy:
def fetch_data() -> list:
    users_i_follow = []
    data_generator = t.following(user=MY_ID)

    # Use generator to repeatedly get the next list of users:
    for obj in data_generator:
        users_i_follow.extend(obj['data']) if 'data' in obj.keys() \
            else users_i_follow.extend(obj['errors'])

    return users_i_follow

In [11]:
today = datetime.strftime(datetime.today(), '%b-%-d-%Y')
yesterday = datetime.strftime((datetime.today() + timedelta(days=-1)), '%b-%-d-%Y')


def is_following_list_identical():
    td_df = pd.read_csv(f'static/{today}.csv')
    yd_df = pd.read_csv(f'static/{yesterday}.csv')

    # Default data contains a lot of extra columns, so they are dropped:
    extra_columns = ['Unnamed: 0', 'entities.url.urls', 'entities.description.urls', 'public_metrics.followers_count',
                     'public_metrics.following_count', 'public_metrics.tweet_count', 'public_metrics.listed_count',
                     'entities.description.hashtags', 'entities.description.mentions', 'pinned_tweet_id',
                     'entities.description.cashtags']
    td_df.drop(columns=extra_columns, inplace=True)
    yd_df.drop(columns=extra_columns, inplace=True)

    # We can assume each user id is unique, so it can be added to a set for easy comparison
    td_ids = set(td_df['id'])
    yd_ids = set(yd_df['id'])

    # To find differences that exist in either set, we use
    # the `.symmetric_difference()` method.
    # https://betterprogramming.pub/a-visual-guide-to-set-comparisons-in-python-6ab7edb9ec41
    sym_diff = td_ids.symmetric_difference(yd_ids)

    no_diff = len(sym_diff) == 0
    if no_diff:
        return False, sym_diff
    else:
        return True, sym_diff



# TODO: Sanity check - write a function to plant a difference
#  and see what happens to the behavior of the function.

# following_today = fetch_data()
# df = pd.json_normalize(following_today)
# df.to_csv(f'static/{today}.csv')
# df.to_csv(f'static/{yesterday}.csv')

print(is_following_list_identical())

(False, set())
