# Time Comparison

In [1]:
import json
import gzip
import bz2
import lzma
import glob
from os.path import basename
from  collections import *
from tqdm.notebook import tqdm
import ast
import networkx as nx
import random
import pickle
import itertools
from collections import Counter

# Biggest Changes Between Years

In [2]:
def load_z_scores(year):
    directory = '/shared/0/projects/reddit-political-affiliation/data/z-scores/'
    rep_file = directory + str(year) + '_rep_scores.pickle'
    dem_file = directory + str(year) + '_dem_scores.pickle'

    with open(rep_file, 'rb') as f:
        rep_z_scores = pickle.load(f)
    
    with open(dem_file, 'rb') as f:
        dem_z_scores = pickle.load(f)
    
    return rep_z_scores, dem_z_scores
   

print("Loading in 2018 scores")
z_rep_18, z_dem_18 = load_z_scores(2018)
print("Loading in 2019 scores")
z_rep_19, z_dem_19 = load_z_scores(2019)

z_rep_delta, z_dem_delta = {}, {}

for user, score in tqdm(z_rep_19.items(), desc="Computing REP delta between 2018 and 2019"):
    if user in z_rep_18:
        z_rep_delta[user] = abs(score - z_rep_18[user])
    
    
for user, score in tqdm(z_dem_19.items(), desc="Computing DEM delta between 2018 and 2019"):
    if user in z_dem_18:
        z_dem_delta[user] = abs(score - z_dem_18[user])

Loading in 2018 scores
Loading in 2019 scores


HBox(children=(FloatProgress(value=0.0, description='Computing REP delta between 2018 and 2019', max=13821990.…




HBox(children=(FloatProgress(value=0.0, description='Computing DEM delta between 2018 and 2019', max=13821990.…




# Grab Biggest Subreddit Differences

In [3]:
print("Filtering data down to subreddits only")
sub_rep_deltas = {k: v for k, v in z_rep_delta.items() if k[:2] == 'r/' and k[2:4] != 'u_'}
sub_dem_deltas = {k: v for k, v in z_dem_delta.items() if k[:2] == 'r/' and k[2:4] != 'u_'}
    
print("Sorting lists")
sub_rep_deltas = {k: v for k, v in sorted(sub_rep_deltas.items(), key=lambda item: item[1], reverse=True)}
sub_dem_deltas = {k: v for k, v in sorted(sub_dem_deltas.items(), key=lambda item: item[1], reverse=True)}

top_dem = dict(itertools.islice(sub_rep_deltas.items(), 100))
top_rep = dict(itertools.islice(sub_dem_deltas.items(), 100))

print("Top DEM Subreddits")
print(top_dem)

print("Top REP Subreddits")
print(top_rep)

Filtering data down to subreddits only
Sorting lists
Top DEM Subreddits
{'r/AskThe_Donald': 45215942024.81679, 'r/Bladesmith': 45215942024.808716, 'r/thick': 43941997137.799286, 'r/AccidentalComedy': 43941997137.79629, 'r/hamiltonmusical': 43941997137.79621, 'r/TrueAskReddit': 43941997137.78985, 'r/AdventuresOfSabrina': 43941997137.782875, 'r/chicagofood': 43941997137.7802, 'r/AskScienceDiscussion': 42629998964.637634, 'r/WWEChampions': 42629998964.634796, 'r/E30': 42629998964.634605, 'r/JonTron': 42629998964.63394, 'r/CanadianHardwareSwap': 42629998964.631424, 'r/composer': 42629998964.63089, 'r/homeless': 42629998964.6306, 'r/AskNOLA': 42629998964.62345, 'r/chinchilla': 42629998964.62328, 'r/PublicFlashing': 42629998964.62159, 'r/XYONetwork': 42629998964.6006, 'r/AngelsAndAirwaves': 42629998964.54095, 'r/Awww': 41276319009.715065, 'r/indie': 41276319009.71232, 'r/Dariusmains': 41276319009.709816, 'r/Syracuse': 41276319009.70934, 'r/ArenaHS': 41276319009.709145, 'r/batonrouge': 412763

In [None]:
print("Sorting REP deltas")
z_rep_delta = {k: v for k, v in sorted(z_rep_delta.items(), key=lambda item: item[1], reverse=True)}

print("Sorting DEM deltas")
z_dem_delta = {k: v for k, v in sorted(z_dem_delta.items(), key=lambda item: item[1], reverse=True)}


top_dem = dict(itertools.islice(z_dem_delta.items(), 100))
top_rep = dict(itertools.islice(z_rep_delta.items(), 100))

print("Top DEM deltas")
print(top_dem)

print("Top REP deltas")
print(top_rep)