In [None]:
import json
import os
import time
import numpy as np
import datetime  

from fyp.crypto import Crypto
from fyp.influence_measures import ri, snp
from fyp.twitter_api import twitter_api, convert_datetime_to_ISO_8601


In [None]:
from fyp.secrets import SECRETS
headers = {"Authorization": f"Bearer {SECRETS.TWITTER_BEARER_TOKEN}"}


In [None]:
CRYPTO = Crypto()


In [None]:
d = '/its/home/ep396/Documents/FYP/data/decrypted-new-initial-tweets.json'
e = '/its/home/ep396/Documents/FYP/data/encrypted-new-initial-tweets.json'

CRYPTO.age_decrypt_file(e, d)

file = open(d, encoding='utf8')
data = json.load(file)
file.close()

os.remove(d)

len(data)


In [None]:
class janky_metrics():
    def get_users_metrics(tweet_data: dict) -> dict:
        user_data = {}

        for tweet in tweet_data:
            author, metrics = tweet["author_id"], tweet["public_metrics"]
            metric_list = np.array([metric for metric in metrics.values()])
            if author not in user_data:
                user_data[author] = metric_list
            else:
                user_data[author] = np.add(metric_list, user_data[author])

        return user_data


    def collect_user_totals_metrics(user_data: dict, weights: np.array) -> dict:
        totals = {}

        for user, metric_array in user_data.items():
            totals[user] = np.sum(metric_array * weights)

        return totals


    def get_x_best_users(user_totals: dict, x: int) -> list:
        assert x <= len(user_totals)

        best_users = []
        sorted_totals = dict(
            sorted(user_totals.items(), key=lambda x: x[1], reverse=True)
        )

        for idx, (k, v) in enumerate(sorted_totals.items()):
            if idx == x:
                break
            best_users.append((int(k), v))

        return best_users


In [None]:
metrics = janky_metrics.get_users_metrics(data)
total_metrics = janky_metrics.collect_user_totals_metrics(metrics, np.array([1, 1, 1, 1]))
best_users = janky_metrics.get_x_best_users(total_metrics, 50) # CHANGE IN PROD
print(len(total_metrics))
print(len(best_users))


In [None]:

def user_id_to_usernames(best_users):
    groupings = np.array_split([user[0] for user in best_users], 5)
    url = "https://api.twitter.com/2/users"
    users = []

    for i in range(5):
        ids_str = ""
        for id in groupings[i]:
            ids_str += f"{id},"
        ids_str = ids_str[:-1]

        params = {
            "ids": ids_str,
            "user.fields": "username"
        }

        (
            user_data,
            limit_remaining_requests,
            limit_reset_time
        ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

        if user_data["fyp"]["error"] == True:
            raise Exception(user_data)

        users += user_data["data"]
        time.sleep(1.05)
    
    return {int(user["id"]):user["username"] for user in users}


In [None]:
def ratelimit_wait(limit_reset_time, thing, len_concat_data):
    print("---- Start Ratelimit Wait ----")
    print(f"Current {thing} captured: {len_concat_data}")
    print(f"Unix epochs when: {limit_reset_time}")
    time_reset = datetime.datetime.fromtimestamp(limit_reset_time)
    print(f"Completion when: {time_reset}")
    time.sleep(time.mktime(time_reset.timetuple()) - time.time() + 1)
    print(f"Completed, time is: {datetime.datetime.now()}")
    print("---- End Ratelimit Wait ----\n")


In [None]:
def collect_user_tweet_discourse_count(user_id_name_pair):
    url = "https://api.twitter.com/2/tweets/counts/all"
    data_stuff = {}

    for idx, pair in enumerate(user_id_name_pair.items()):
        user_id, user_username = pair
        params = {
            "query": f'("trans" OR "enby" OR "transgender" OR "nonbinary") -"eng trans" -"#transporn" -"#porn" -is:nullcast lang:en -is:retweet is:reply from:{user_username}',
            "start_time": convert_datetime_to_ISO_8601(datetime.datetime(2021, 1, 1, 1, 0, 0, 0)),
            "end_time": convert_datetime_to_ISO_8601(datetime.datetime(2021, 12, 31, 23, 59, 59, 999999)),
            "granularity": "day"
        }

        print(f"=> User {idx}")

        cont, concat_data = True, []

        while cont:
            (
                api_data, 
                limit_remaining_requests, 
                limit_reset_time
            ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

            if api_data['fyp']['error']:
                if "status" in api_data and api_data["status"] == 429:
                    ratelimit_wait(limit_reset_time, 'mentions', len(concat_data))
                else:
                    raise Exception(api_data)
            else:
                if api_data["fyp"]["error"] is False:
                    concat_data += api_data["data"]
                    print(f"Added: {len(api_data['data'])}")
                    print(f"Total: {len(concat_data)}\n")
                    params["next_token"] = api_data["meta"]["next_token"] if "next_token" in api_data["meta"] else None

                if params["next_token"] is None and api_data["fyp"]["error"] is False:
                    cont = False

                if limit_remaining_requests <= 0 and cont is True:
                    ratelimit_wait(limit_reset_time, 'mentions', len(concat_data))

            time.sleep(0.25)

        data_stuff[user_id] = concat_data
    
    return data_stuff


In [None]:
def calculate_tweet_discourse_count(data_stuff):
    user_count_totals = {}

    for user in best_users:
        user_id = user[0]
        counts = data_stuff[user_id]
        total = 0
        for count in counts:
            total += count["tweet_count"]
        user_count_totals[user_id] = total


In [33]:
most_discourse_users = janky_metrics.get_x_best_users(user_count_totals, 15)
