# Mentions

In [1]:
COMMUNITY_MAP_PATH = 'data/community_map.pickle'
MENTION_MAP_PATH = 'data/mention_map.pickle'

## Utils

In [2]:
import pickle

def load_data_from_pickle(filename: str):
    with open(filename, "rb") as f:
        return pickle.load(f)

In [3]:
import pandas as pd
from typing import Dict, List, Tuple
from collections import defaultdict

def load_comm_mention_map_as_df() -> pd.DataFrame:
    community_map: Dict[str, int] = load_data_from_pickle(
        COMMUNITY_MAP_PATH
    )
    # maps the author to a list of users he mentions (user, tweet_id)
    mention_map: Dict[str, List[Tuple[str, str]]] = \
        load_data_from_pickle(MENTION_MAP_PATH)

    data = []
    for author, mention_data in mention_map.items():
        author_community = community_map.get(author, "none")
        user_tweet_ids = defaultdict(list)
        for user, tweet_id in mention_data:
            user_tweet_ids[user].append(tweet_id)
        for u, tweet_ids in user_tweet_ids.items():
            count = len(tweet_ids)
            user_community = community_map.get(u, "none")
            data.append((author, author_community, u, user_community, count, tweet_ids))

    df = pd.DataFrame(
        data,
        columns=[
            "author",
            "author_community",
            "user",
            "user_community",
            "count",
            "original_tweets_ids",
        ],
    )

    return df

## Analysis

In [4]:
comm_mention_df = load_comm_mention_map_as_df()
comm_mention_df.head()

Unnamed: 0,author,author_community,user,user_community,count,original_tweets_ids
0,RalphTo51227093,none,realDonaldTrump,trump,1,[1312008624601018370]
1,RalphTo51227093,none,FLOTUS,trump,1,[1312008624601018370]
2,RedPill_Crusade,trump,JuliansRum,trump,1,[1328202123557933056]
3,RedPill_Crusade,trump,realDonaldTrump,trump,5,"[1328202123557933056, 1323446679610269697, 133..."
4,RedPill_Crusade,trump,CodeMonkeyZ,trump,1,[1330403734317375488]


In [6]:
trump_comm_mention_df = comm_mention_df[comm_mention_df["author_community"] == "trump"]
biden_comm_mention_df = comm_mention_df[comm_mention_df["author_community"] == "biden"]

In [7]:
# get total mentions in each community
trump_total_mentions = trump_comm_mention_df["count"].sum()
biden_total_mentions = biden_comm_mention_df["count"].sum()
print(f"Total mentions in trump community: {trump_total_mentions}")
print(f"Total mentions in biden community: {biden_total_mentions}")

Total mentions in trump community: 100052
Total mentions in biden community: 54878


In [10]:
# Biden community mentions without Trump
biden_comm_mention_df_nt = biden_comm_mention_df[
    biden_comm_mention_df["user"] != "realDonaldTrump"
]
# Trump community mentions without Biden
trump_comm_mention_df_nt = trump_comm_mention_df[
    trump_comm_mention_df["user"] != "JoeBiden"
]

# Get the total number of mentions in each community
trump_total_mentions = trump_comm_mention_df_nt["count"].sum()
biden_total_mentions = biden_comm_mention_df_nt["count"].sum()

# Get the total number of mentions inside and outside the community
trump_inside_mentions = trump_comm_mention_df_nt[
    trump_comm_mention_df_nt["user_community"] == "trump"
]["count"].sum()
trump_outside_mentions = trump_comm_mention_df_nt[
    trump_comm_mention_df_nt["user_community"] == "biden"
]["count"].sum()
proportion_trump_inside = trump_inside_mentions / trump_total_mentions
proportion_trump_outside = trump_outside_mentions / trump_total_mentions

biden_inside_mentions = biden_comm_mention_df_nt[
    biden_comm_mention_df_nt["user_community"] == "biden"
]["count"].sum()
biden_outside_mentions = biden_comm_mention_df_nt[
    biden_comm_mention_df_nt["user_community"] == "trump"
]["count"].sum()
proportion_biden_inside = biden_inside_mentions / biden_total_mentions
proportion_biden_outside = biden_outside_mentions / biden_total_mentions

# Create a dataframe
data = {
    "community": ["trump", "biden"],
    "inside": [proportion_trump_inside, proportion_biden_inside],
    "outside": [proportion_trump_outside, proportion_biden_outside],
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,community,inside,outside
0,trump,0.69983,0.059904
1,biden,0.534552,0.157632
