# Hashtags

In [12]:
HASHTAG_RESULTS_PATH = "data/author_hashtags_map.json"
COMMUNITY_MAP_PATH = "data/community_map.pickle"

In [13]:
import json
import pickle
from typing import Union, Dict


def load_json_results(filename: str) -> Union[dict, list]:
    with open(filename, "r", encoding="utf-8") as f:
        return json.load(f)
    
def load_data_from_pickle(filename: str):
    with open(filename, "rb") as f:
        return pickle.load(f)

In [14]:
from collections import Counter
import pandas as pd


author_hashtags = load_json_results(HASHTAG_RESULTS_PATH)
community_map: Dict[str, int] = load_data_from_pickle(
        COMMUNITY_MAP_PATH
)

In [15]:
# filter authors by community biden and trump
trump_hashtags = []
biden_hashtags = []

for author, hashtags in author_hashtags.items():
    if community_map.get(author, "none") == "trump":
        trump_hashtags.extend(hashtags)
    elif community_map.get(author, "none") == "biden":
        biden_hashtags.extend(hashtags)

In [16]:
trump_hashtags_counter = Counter([s.lower() for s in trump_hashtags])
biden_hashtags_counter = Counter([s.lower() for s in biden_hashtags])

top30_trump_hashtags = trump_hashtags_counter.most_common(30)
top30_biden_hashtags = biden_hashtags_counter.most_common(30)

top30_trump_hashtags = pd.DataFrame(top30_trump_hashtags, columns=["hashtag", "count"])
top30_biden_hashtags = pd.DataFrame(top30_biden_hashtags, columns=["hashtag", "count"])

top30_trump_hashtags.to_csv("output/top30_trump_hashtags.csv", index=False)
top30_biden_hashtags.to_csv("output/top30_biden_hashtags.csv", index=False)

In [17]:
print(len(trump_hashtags_counter) + len(biden_hashtags_counter))

9025
