In [1]:
from dotenv import load_dotenv
load_dotenv()


True

In [28]:
from tweepy import Client, Paginator

import os
import math

In [3]:
client = Client(
    bearer_token=os.environ["BEARER_TOKEN"], wait_on_rate_limit=True)

In [34]:
academic_client = Client(
    bearer_token=os.environ["ACADEMIC_BEARER_TOKEN"], wait_on_rate_limit=True)

## Calculate influence score

In [4]:
# Get maximum allowable tweets per user
def get_all_tweets(id: int):
    all_tweets = []

    for response in Paginator(client.get_users_tweets, id, tweet_fields=["created_at", "text",
                                                                         "source", "public_metrics", "geo", "entities", "conversation_id", "possibly_sensitive", "referenced_tweets"],
                              max_results=100):
        if response.data is not None:
            all_tweets.extend(response.data)

    return all_tweets


In [5]:
tweets = get_all_tweets(id=1145137598379282432 )
len(tweets)

3240

In [6]:
# check if tweet contains urls, hashtags, mentions
def get_entities_count(tweet):
    entities = tweet.entities

    if entities is None:
        return {
            "hashtag_count": 0,
            "url_count": 0,
            "mention_count": 0,
        }

    return{
        "hashtag_count": len(entities.get("hashtags", "")),
        "url_count": len(entities.get("urls", "")),
        "mention_count": len(entities.get("mentions", "")),
    }


In [7]:
# check if tweet is retweet
def check_if_retweet(tweet):
    referenced_tweets = tweet.referenced_tweets

    if referenced_tweets is None:
        return 0

    for referenced_tweet in referenced_tweets:
        if referenced_tweet["type"] == "retweeted":
            return 1

    return 0


In [8]:
# calculate h-index
# https://www.youtube.com/watch?v=FvnTWDKT_ck
def calculate_h_index(array: list[int]) -> int:
    N = len(array)
    tmp = [0 for _ in range(N+1)]
    for i, v in enumerate(array):
        if v > N:
            tmp[N] += 1
        else:
            tmp[v] += 1

    total = 0
    for i in range(N, -1, -1):
        total += tmp[i]
        if total >= i:
            return i


# def get_retweet_index(tweets):


calculate_h_index([0])


0

In [47]:
def get_user_level_features(id: int):
    response = client.get_user(
        id=id, user_fields=["public_metrics", "created_at"])
    if response.data is None:
        return None

    public_metrics = response.data["public_metrics"]


    username = response.data["username"]
    user_id = response.data["id"]
    followers_count = public_metrics["followers_count"]
    following_count = public_metrics["following_count"]
    listed_count = public_metrics["listed_count"]
    tweet_count = public_metrics["tweet_count"]
    account_creation_date = response.data["created_at"]
    social_reputation_score = math.log( 
        (1 + followers_count) * (1+followers_count), 10) + math.log(1 + tweet_count) - math.log((1+following_count))

    mention_ratio = get_total_tweets_mentioning_user(username, account_creation_date) / tweet_count


    return {
        "username": username,
        "user_id": user_id,
        "tweet_count": tweet_count,
        "listed_count": listed_count,
        "social_reputation_score": social_reputation_score,
        "followers_count": followers_count,
        "following_count": following_count,
        "mention_ratio": mention_ratio
    }


user_level_features = get_user_level_features(1145137598379282432)
user_level_features


{'username': 'gyaigyimii',
 'user_id': 1145137598379282432,
 'tweet_count': 334031,
 'listed_count': 412,
 'social_reputation_score': 14.739181555172541,
 'followers_count': 965771,
 'following_count': 20942,
 'mention_ratio': 38.32642479290844}

In [35]:
def get_total_tweets_mentioning_user(username, account_creation_date):
    if username is None or account_creation_date is None:
        return None

    query = f'("@{username}") -from:{username}'

    total_tweets = 0
    for response in Paginator(academic_client.get_all_tweets_count, query, granularity="day", start_time=account_creation_date):
        total_tweets += response.meta["total_tweet_count"]
    
    return total_tweets


# get_total_tweets_mentioning_user(
#     user["username"], user["account_creation_date"])


In [46]:


def get_tweet_credibility_index_score(tweets: list):

    total_likes = total_replies = total_retweets = total_tweets_w_url = total_quotes = total_hashtags = total_urls = total_mentions = total_retweeted_posts = total_retweets_plus_quotes = 0
    total_tweets = len(tweets)
    retweet_list = []
    quote_list = []
    like_list = []
    reply_list = []
    retweets_plus_quotes_list = []

    for tweet in tweets:

        retweet_count = tweet.public_metrics["retweet_count"]
        total_retweets += retweet_count
        retweet_list.append(retweet_count)

        quote_count = tweet.public_metrics["quote_count"]
        total_quotes += quote_count
        quote_list.append(quote_count)

        retweets_plus_quotes_count = tweet.public_metrics["quote_count"] + \
            tweet.public_metrics["retweet_count"]
        total_retweets_plus_quotes += retweets_plus_quotes_count
        retweets_plus_quotes_list.append(retweets_plus_quotes_count)

        like_count = tweet.public_metrics["like_count"]
        total_likes += like_count
        like_list.append(like_count)

        reply_count = tweet.public_metrics["reply_count"]
        total_replies += reply_count
        reply_list.append(reply_count)

        entities_count = get_entities_count(tweet)
        total_hashtags += entities_count["hashtag_count"]

        total_urls += entities_count["url_count"]
        if entities_count["url_count"] > 0:
            total_tweets_w_url += 1

        total_mentions += entities_count["mention_count"]

        total_retweeted_posts += check_if_retweet(tweet)

    # tweet credibility components
    original_content_ratio = (
        total_tweets - total_retweeted_posts) / total_tweets
    url_ratio = total_urls / total_tweets
    like_ratio = total_likes / total_tweets
    retweet_ratio = total_retweets / total_tweets
    reply_ratio = total_replies / total_tweets
    quote_ratio = total_quotes / total_tweets
    hashtag_ratio = total_hashtags / total_tweets

    # index score
    retweet_h_index = calculate_h_index(retweet_list)
    like_h_index = calculate_h_index(like_list)
    quote_h_index = calculate_h_index(quote_list)
    reply_h_index = calculate_h_index(reply_list)
    retweets_plus_quotes_h_index = calculate_h_index(retweets_plus_quotes_list)

    # calculate tweet credibility
    tweet_credibility = ((retweet_ratio + like_ratio +
                         hashtag_ratio + url_ratio) / 4) * original_content_ratio

    return {
        "retweet_h_index": retweet_h_index,
        "like_h_index": like_h_index,
        "quote_h_index": quote_h_index,
        "reply_h_index": reply_h_index,
        "retweets_plus_quotes_h_index": retweets_plus_quotes_h_index,
        "tweet_credibility": tweet_credibility,
        "total_tweets_w_url": total_tweets_w_url,
        "retweet_ratio": retweet_ratio,
        "like_ratio": like_ratio,
        "reply_ratio": reply_ratio,
        "quote_ratio": quote_ratio,
        "url_ratio": url_ratio,
    }


tweet_level_features = get_tweet_credibility_index_score(tweets)
tweet_level_features


{'retweet_h_index': 558,
 'like_h_index': 530,
 'quote_h_index': 46,
 'reply_h_index': 122,
 'retweets_plus_quotes_h_index': 562,
 'tweet_credibility': 259.76394523510135,
 'total_tweets_w_url': 1266,
 'retweet_ratio': 1791.4922839506173,
 'like_ratio': 515.3518518518518,
 'reply_ratio': 19.321913580246914,
 'quote_ratio': 3.123148148148148,
 'url_ratio': 0.4978395061728395}

In [62]:


def get_influence_score(features: dict):
    # sentiment_score = features["sentiment_score"]
    tweet_credibility = features["tweet_credibility"]
    social_reputation_score = features["social_reputation_score"]
    retweet_h_index = features["retweet_h_index"]
    like_h_index = features["like_h_index"]
    quote_h_index = features["quote_h_index"]

    print(sum([tweet_credibility, social_reputation_score, quote_h_index]) / 3)


tweet_level_features.update(user_level_features)

# print(tweet_level_features)


def df():
    return get_influence_score(tweet_level_features)

df()


106.83437559675797
