In [None]:
import json
import os
import time
import numpy as np
import datetime

from fyp.crypto import Crypto
from fyp.influence_measures import ri, snp
from fyp.twitter_api import twitter_api, convert_datetime_to_ISO_8601


In [None]:
from fyp.secrets import SECRETS
headers = {"Authorization": f"Bearer {SECRETS.TWITTER_BEARER_TOKEN}"}


In [None]:
CRYPTO = Crypto()


In [None]:
d = '/its/home/ep396/Documents/FYP/data/decrypted-new-initial-tweets.json'
e = '/its/home/ep396/Documents/FYP/data/encrypted-new-initial-tweets.json'

CRYPTO.age_decrypt_file(e, d)

file = open(d, encoding='utf8')
data = json.load(file)
file.close()

os.remove(d)

len(data)


In [None]:
class janky_metrics():
    def get_users_metrics(tweet_data: dict) -> dict:
        user_data = {}

        for tweet in tweet_data:
            author, metrics = tweet["author_id"], tweet["public_metrics"]
            metric_list = np.array([metric for metric in metrics.values()])
            if author not in user_data:
                user_data[author] = metric_list
            else:
                user_data[author] = np.add(metric_list, user_data[author])

        return user_data


    def collect_user_totals_metrics(user_data: dict, weights: np.array) -> dict:
        totals = {}

        for user, metric_array in user_data.items():
            totals[user] = np.sum(metric_array * weights)

        return totals


    def get_x_best_users(user_totals: dict, x: int) -> list:
        assert x <= len(user_totals)

        best_users = []
        sorted_totals = dict(
            sorted(user_totals.items(), key=lambda x: x[1], reverse=True)
        )

        for idx, (k, v) in enumerate(sorted_totals.items()):
            if idx == x:
                break
            best_users.append((k, v))

        return best_users


In [None]:
metrics = janky_metrics.get_users_metrics(data)
total_metrics = janky_metrics.collect_user_totals_metrics(metrics, np.array([1, 1, 1, 1]))
best_users = janky_metrics.get_x_best_users(total_metrics, 50) # CHANGE IN PROD
print(len(total_metrics))
print(len(best_users))


In [None]:
def ratelimit_wait(limit_reset_time, thing, len_concat_data):
    print("---- Start Ratelimit Wait ----")
    print(f"Current {thing} captured: {len_concat_data}")
    print(f"Unix epochs when: {limit_reset_time}")
    time_reset = datetime.datetime.fromtimestamp(limit_reset_time)
    print(f"Completion when: {time_reset}")
    time.sleep(time.mktime(time_reset.timetuple()) - time.time() + 1)
    print(f"Completed, time is: {datetime.datetime.now()}")
    print("---- End Ratelimit Wait ----\n")


In [None]:
def user_id_to_usernames(best_users):
    groupings = np.array_split([user[0] for user in best_users], 5)
    url = "https://api.twitter.com/2/users"
    users = []

    for i in range(5):
        ids_str = ""
        for id in groupings[i]:
            ids_str += f"{id},"
        ids_str = ids_str[:-1]

        params = {
            "ids": ids_str,
            "user.fields": "username"
        }

        (
            user_data,
            limit_remaining_requests,
            limit_reset_time
        ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

        if user_data["fyp"]["error"] == True:
            raise Exception(user_data)

        users += user_data["data"]
        time.sleep(1.05)
    
    return {int(user["id"]):user["username"] for user in users}


In [None]:
def collect_user_tweet_discourse_count(user_id_name_pair):
    url = "https://api.twitter.com/2/tweets/counts/all"
    data_stuff = {}

    for idx, pair in enumerate(user_id_name_pair.items()):
        user_id, user_username = pair
        params = {
            "query": f'("trans" OR "enby" OR "transgender" OR "nonbinary") -"eng trans" -"#transporn" -"#porn" -is:nullcast lang:en -is:retweet is:reply from:{user_username}',
            "start_time": convert_datetime_to_ISO_8601(datetime.datetime(2021, 1, 1, 1, 0, 0, 0)),
            "end_time": convert_datetime_to_ISO_8601(datetime.datetime(2021, 12, 31, 23, 59, 59, 999999)),
            "granularity": "day"
        }

        print(f"=> User {idx}")

        cont, concat_data = True, []

        while cont:
            (
                api_data, 
                limit_remaining_requests, 
                limit_reset_time
            ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

            if api_data['fyp']['error']:
                if "status" in api_data and api_data["status"] == 429:
                    ratelimit_wait(limit_reset_time, 'mentions', len(concat_data))
                else:
                    raise Exception(api_data)
            else:
                if api_data["fyp"]["error"] is False:
                    concat_data += api_data["data"]
                    print(f"Added: {len(api_data['data'])}")
                    print(f"Total: {len(concat_data)}\n")
                    params["next_token"] = api_data["meta"]["next_token"] if "next_token" in api_data["meta"] else None

                if params["next_token"] is None and api_data["fyp"]["error"] is False:
                    cont = False

                if limit_remaining_requests <= 0 and cont is True:
                    ratelimit_wait(limit_reset_time, 'mentions', len(concat_data))

            time.sleep(0.25)

        data_stuff[user_id] = concat_data
    
    return data_stuff


In [None]:
def calculate_tweet_discourse_count(data_stuff, best_users):
    user_count_totals = {}

    for user in best_users:
        user_id = int(user[0])
        counts = data_stuff[user_id]
        total = 0
        for count in counts:
            total += count["tweet_count"]
        user_count_totals[user_id] = total
    
    return user_count_totals


In [None]:
user_ids_and_name_pair = user_id_to_usernames(best_users)


In [None]:
raw_count = collect_user_tweet_discourse_count(user_ids_and_name_pair)


In [None]:
actual_count = calculate_tweet_discourse_count(raw_count, best_users)


In [None]:
most_discourse_users = janky_metrics.get_x_best_users(actual_count, 15)


In [None]:
def get_user_tweets_over_timespans(best_users, time_spans):
    data = {}

    for time_span in time_spans:
        start_date = convert_datetime_to_ISO_8601(time_span[0])
        end_date = convert_datetime_to_ISO_8601(time_span[1])

        time_data = {}

        print(f"\n=> {time_span[0]} to {time_span[1]} ")

        for idx, user in enumerate(best_users):
            print(f"==> User {idx}")

            user_id = int(user[0])
            cont, concat_data = True, []
            
            url = f"https://api.twitter.com/2/users/{user_id}/tweets"
            params = {
                "exclude": 'retweets,replies',
                "expansions": 'author_id,referenced_tweets.id,entities.mentions.username',
                "max_results": 10, # CHANGE IN PROD
                "media.fields": 'public_metrics',
                "tweet.fields": 'public_metrics,referenced_tweets',
                "user.fields": 'public_metrics,description',
                "start_time": start_date,
                "end_time": end_date,
            }

            # CHANGE IN PROD
            for i in range(2):
                (
                    api_data, 
                    limit_remaining_requests, 
                    limit_reset_time
                ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

                if api_data['fyp']['error'] == True:
                    print(api_data)

                if "meta" in api_data and api_data["meta"]["result_count"] == 0:
                    break
                
                if "errors" in data and len(data["errors"]) == 1 and "type" in data['errors'][0] and data['errors'][0]['type'] == 'https://api.twitter.com/2/problems/not-authorized-for-resource':
                    break

                if api_data['fyp']['error']:
                    raise Exception(api_data)

                if api_data["fyp"]["error"] is False:
                    concat_data += api_data["data"]
                    print(f"Added: {len(api_data['data'])}")
                    print(f"Total: {len(concat_data)}\n")
                    params["pagination_token"] = api_data["meta"]["next_token"] if "next_token" in api_data["meta"] else None

                if params["pagination_token"] is None and api_data["fyp"]["error"] is False:
                    cont = False

                if limit_remaining_requests <= 0 and cont is True:
                    ratelimit_wait(limit_reset_time, 'tweets', len(concat_data))

                time.sleep(1.05)

            time_data[user_id] = concat_data
        
        data[time_span] = time_data
    
    return data


In [None]:
def count_tweets_and_get_ids(best_users, user_tweets, time_spans):
    user_tweet_count = {int(user[0]):[] for user in best_users}

    for time_span in time_spans:
        current_time_span_data = user_tweets[time_span]
        for user in best_users:
            user_id = int(user[0])
            if user_id in current_time_span_data:
                for tweet in current_time_span_data[user_id]:
                    user_tweet_count[user_id].append(tweet["id"])
    
    return user_tweet_count


In [None]:
def get_tweet_retweeters(best_users, user_tweet_count_and_tweet_ids):
    retweeters = {}
    url = f"https://api.twitter.com/1.1/statuses/retweeters/ids.json"

    for user_idx, user in enumerate(best_users):
        user_id = int(user[0])
        retweeters[user_id] = {}

        print(f"==> User {user_idx}")


        for hidden_idx, idx in enumerate(user_tweet_count_and_tweet_ids[user_id]):
            cont, concat_data = True, []
            
            params = {
                'id': idx,
                'count': 100
            }

            print(f"==> Tweet {hidden_idx}")
            while cont:
                (
                    api_data, 
                    limit_remaining_requests, 
                    limit_reset_time
                ) = twitter_api(headers=headers, url=url, params=params, data_location='ids')

                if api_data['fyp']['error']:
                    print(api_data)
                    raise Exception(api_data)

                if api_data["fyp"]["error"] is False:
                    concat_data += api_data["ids"]
                    print(f"Added: {len(api_data['ids'])}")
                    print(f"Total: {len(concat_data)}\n")
                    params["cursor"] = api_data["next_cursor"] if "next_cursor" in api_data else None

                if params["cursor"] == 0 and api_data["fyp"]["error"] is False:
                    cont = False

                if limit_remaining_requests <= 0 and cont is True:
                    ratelimit_wait(limit_reset_time, 'tweets', len(concat_data))

                time.sleep(1.05)
            
            retweeters[user_id][idx] = concat_data
            
    return retweeters


In [None]:
def get_unique_retweeters(best_users, user_unique_retweeters):
    unique_retweeters = {}

    for user in best_users:
        user_id = int(user[0])
        retweeted_users = []
        for value in user_unique_retweeters[user_id].values():
            if value is not []:
                for idx in value:
                    if idx not in retweeted_users:
                        retweeted_users.append(idx)
        
        unique_retweeters[user_id] = retweeted_users

    return unique_retweeters


In [None]:
def calculate_ri_for_users(best_users, user_tweet_count, user_unique_retweeters):
    ri_results = {}

    for user in best_users:
        user_id = int(user[0])
        tweet_count = len(user_tweet_count[user_id])
        unique_retweeters = len(user_unique_retweeters[user_id])
        ri_val = ri(tweet_count, unique_retweeters)
        ri_results[user_id] = ri_val if str(ri_val) != "nan" and str(ri_val) != "-inf" else 0

    return ri_results


In [None]:
time_spans = [
    (datetime.datetime(2021, 4, 5, 0, 0, 0), datetime.datetime(2021, 4, 10, 23, 59, 59, 999999)),
    (datetime.datetime(2021, 8, 20, 0, 0, 0), datetime.datetime(2021, 8, 25, 23, 59, 59, 999999)),
    (datetime.datetime(2021, 12, 15, 0, 0, 0), datetime.datetime(2021, 12, 20, 23, 59, 59, 999999)),
]


In [None]:
# get user tweets within timespan
user_tweets = get_user_tweets_over_timespans(most_discourse_users, time_spans)


In [None]:
# calculate total tweets for each user
user_tweet_count_and_tweet_ids = count_tweets_and_get_ids(most_discourse_users, user_tweets, time_spans)


In [None]:
# get user retweeters within three time spans
user_retweeters = get_tweet_retweeters(most_discourse_users, user_tweet_count_and_tweet_ids)


In [None]:
# get user retweeters within three time spans
user_unique_retweeters = get_unique_retweeters(most_discourse_users, user_retweeters)


In [None]:
# calculate ri for each user
ri_metrics = calculate_ri_for_users(most_discourse_users, user_tweet_count_and_tweet_ids, user_unique_retweeters)


In [None]:
best_ri_users = janky_metrics.get_x_best_users(ri_metrics, 5) # CHANGE IN PROD


In [None]:
def get_user_mentions_within_time_span(best_ri_users, time_spans):
    data = {}

    for time_span in time_spans:
        print(f"\n=> {time_span[0]} to {time_span[1]} ")

        time_data = {}

        start_date = convert_datetime_to_ISO_8601(time_span[0])
        end_date = convert_datetime_to_ISO_8601(time_span[1])

        params = {
            "max_results": 100,
            "expansions": 'author_id',
            "start_time": start_date,
            "end_time": end_date,
        }

        for idx, user in enumerate(best_ri_users):
            print(f"==> User {idx}")

            user_id = user[0]
            url = f"https://api.twitter.com/2/users/{user_id}/mentions"
            cont, concat_data = True, []

            for i in range(2): # CHANGE IN PROD
                (
                    api_data, 
                    limit_remaining_requests, 
                    limit_reset_time
                ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

                if "meta" in api_data and api_data["meta"]["result_count"] == 0:
                    print("None")
                    break

                if api_data['fyp']['error']:
                    raise Exception(api_data)
                
                if api_data["fyp"]["error"] is False:
                    concat_data += api_data["data"]
                    print(f"Added: {len(api_data['data'])}")
                    print(f"Total: {len(concat_data)}\n")
                    params["pagination_token"] = api_data["meta"]["next_token"] if "next_token" in api_data["meta"] else None

                if params["pagination_token"] is None and api_data["fyp"]["error"] is False:
                    cont = False

                if limit_remaining_requests <= 0 and cont is True:
                    ratelimit_wait(limit_reset_time, 'mentions', len(concat_data))

                time.sleep(1.05)

            time_data[user_id] = concat_data
        data[time_span] = time_data
    return data


In [None]:
mentions = get_user_mentions_within_time_span(best_ri_users, time_spans)

In [None]:
def get_user_follower_count(best_ri_users):
    ids = ""
    for user in best_ri_users: ids += f"{user[0]},"
    ids = ids[:-1]

    url = "https://api.twitter.com/2/users"
    params = {
        "ids": ids,
        "user.fields": "public_metrics"
    }

    (
        user_data,
        limit_remaining_requests,
        limit_reset_time
    ) = twitter_api(headers=headers, url=url, params=params, data_location='data')

    user_follower_counts = {}

    for user in user_data["data"]:
        user_follower_counts[int(user["id"])] = user["public_metrics"]["followers_count"]

    return user_follower_counts


In [None]:
user_follower_counts = get_user_follower_count(best_ri_users)

In [None]:
class UserObject:
    pass

In [None]:
def get_unique_mentions(best_ri_users, mentions, time_spans):
    data = {}
    for user in best_ri_users:
        user_id = user[0]
        temp_data = []
        for time_span in time_spans:
            current = mentions[time_span][user_id]
            for mention in current:
                author_id = int(mention["author_id"])
                if author_id not in temp_data:
                    temp_data.append(author_id)
        data[user_id] = temp_data
        
    return data


In [None]:
def get_number_of_tweets_retweeted_and_replied(best_ri_users, user_tweets):
    retweet_count = {}
    reply_count = {}

    for user in best_ri_users:
        user_id = user[0]

        retweet_count[user_id] = 0
        reply_count[user_id] = 0

        for time_span in time_spans:
            current = user_tweets[time_span][user_id]
            for tweet in current:
                if tweet["public_metrics"]["reply_count"] > 0:
                    reply_count[user_id] += 1
                if tweet["public_metrics"]["retweet_count"] > 0 or tweet["public_metrics"]["quote_count"] > 0:
                    retweet_count[user_id] += 1
                    
    return retweet_count, reply_count


In [None]:
unique_mentions = get_unique_mentions(best_ri_users, mentions, time_spans)


In [None]:
retweet_count, reply_count = get_number_of_tweets_retweeted_and_replied(best_ri_users, user_tweets)


In [None]:
def calculate_snp_for_users():
    snp_metrics = {}

    for user in best_ri_users:
        user_id = user[0]
        user_object = UserObject()

        user_object.num_of_unique_users_who_retweeted = len(user_unique_retweeters[user_id])
        user_object.num_of_unique_users_mentioning_the_user = len(unique_mentions[user_id])
        user_object.num_of_followers = user_follower_counts[user_id]

        user_object.tweets_of_user_retweeted = len(retweet_count)
        user_object.tweets_of_user_replied = len(reply_count)
        user_object.tweets_by_user = len(user_tweet_count_and_tweet_ids[user_id])

        snp_metrics[user_id] = snp(user_object)
    
    return snp_metrics


In [None]:
snp_metrics = calculate_snp_for_users()


In [None]:
best_snp_users = janky_metrics.get_x_best_users(snp_metrics, 2) # CHANGE IN PROD


In [None]:
# results may be distorted due to large following amassed over time but only using certain tweets
# maybe we need to collect all tweets for the top 50 users over all time?
snp_metrics