In [1]:
#IMPORTS
import networkx as nx
import os
import json
import sys

In [2]:
# DIRECTORIES
DATA = "/ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/"
OUT = '/ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/loyalty/'

In [3]:
# GENERATING CONDENSED AUTHOR-SUBREDDIT DICTIONARY (COMBINE ALL 12 MONTHS INTO ONE DICTIONARY)
CONDENSED = {}

# READ IN MONTH BY MONTH
def condense(filename):
    # Setting path to file to read in
    file_path = os.path.join(DATA, filename)

    print(f"Reading {file_path} ...")

    # Open the data
    DICT = {}
    with open(file_path, 'r') as json_file:
        DICT = json.load(json_file)
    
    # Iterate over all authors
    for author in list(DICT.keys()):

        # Key check
        if author not in CONDENSED:
            CONDENSED[author] = {}

        # Add up subreddit interactions
        for subreddit in DICT[author]:

            # Key check
            if subreddit not in CONDENSED[author]:
                CONDENSED[author][subreddit] = 0
            
            # Incrementing    
            CONDENSED[author][subreddit] += DICT[author][subreddit]

In [4]:
LOYALTY = {}

# Generates LOYALTY, where keys are authors and values are the subreddits they are loyal to or None
def compute_user_loyalty():

    # Per author
    for author in CONDENSED:

        # Find subreddit with maximum posts
        bsf_value = 0
        bsf_subreddit = ''
        total = 0
        for subreddit in CONDENSED[author]:
            val = CONDENSED[author][subreddit]

            if val > bsf_value:
                bsf_value = val
                bsf_subreddit = subreddit

            total += val

        # Check if author is loyal to subreddit bsf_subreddit
        if (bsf_value/total) >= 0.50:
            LOYALTY[author] = bsf_subreddit
        else:
            LOYALTY[author] = None


In [5]:
# Generate the CONDENSED dictionary
# Considering only TOP posts
for filename in os.listdir(DATA):
    if "_top" in filename:
        condense(filename)

Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-01_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-11_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-06_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-09_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-10_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-08_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-07_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/dictionaries/RC_2019-04_top.json ...
Reading /ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_an

In [6]:
# Compute Loyalty
compute_user_loyalty()

In [7]:
# Save the loyalty values
# Dump to file
path = os.path.join('/ais/hal9000/datasets/reddit/stance_pipeline/luo_data/network_analysis/loyalty/', "user_loyalty_dictionary")
with open(path,"w") as f:
    json.dump(LOYALTY,f)

In [None]:
# NEXT NOTEBOOK ==> AddLoyalty