In [3]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

In [4]:
df_subs = pd.read_csv('../subreddits.csv')
df_mods = pd.read_csv('../moderators.csv', index_col=0)

In [6]:
g = nx.read_gexf('../multi_graph.gexf')

Compute basic statistics on subs

In [152]:
n_members = df_subs.set_index('name').to_dict()['n_members']
n_moderators = df_mods.groupby('subreddit').agg(len).to_dict()['moderator']

A primitive function to score relatedness between nodes

In [153]:
def relatedness(s1, s2):
    if edges := g.get_edge_data(s1, s2):
        return len(edges) / g.degree(s1) / g.degree(s2)
    else:
        return 0

Let's focus on `r/Palestine` as it's a sub I know well.

In [154]:
s0 = 'Palestine'
distances = nx.shortest_path_length(g, source=s0)

In [155]:
#Get most related subs
sorted(g.neighbors(s0), key=lambda s1: relatedness(s0, s1), reverse=True)

['IsraelCrimes',
 'JewsOfConscience',
 'Panarab',
 'boringdystopia',
 'GreenAndEXTREME',
 'AskMiddleEast',
 'AbolishTheMonarchy',
 'Britain',
 'GreenAndPleasant',
 'LandlordLove',
 'IRLEasterEggs',
 'Thatsactuallyverycool',
 'Documentaries',
 'OpenAI',
 'Enough_Vaush_Spam',
 'Hasan_Piker',
 'therewasanattempt',
 'ToiletPaperUSA',
 'TheRightCantMeme',
 'lostgeneration',
 'worldnewsvideo',
 'PublicFreakout',
 'Unexpected']

Let's also add neighbors of second-order

In [142]:
neighbors_o1 = [s for s, d in distances.items() if d == 1]
neighbors_o2 = [s for s, d in distances.items() if d == 2]

In [143]:
len(neighbors_o1), len(neighbors_o2)

(23, 677)

That's too many to plot on a graph. Let's select the most promising second-level nodes:

In [144]:
#number of adjacent level1 subs
o2_nadj_uniq = defaultdict(int)
#number of moderators in common, weighted by the total number of moderators in each sub
o2_nedg_smart = defaultdict(float)

for s2 in neighbors_o2:
    for s1 in neighbors_o1:
        o2_nadj_uniq[s2] += 1
        o2_nedg_smart[s2] += relatedness(s1, s2)


#removing all order-2 neighbors that are connected to one order-1 neighbor only
neighbors_o2 = {s for s in neighbors_o2 if o2_nadj_uniq[s] > 1}
# taking top-5 by nedg_smart criterion
neighbors_o2 = sorted(neighbors_o2, key=o2_nedg_smart.get, reverse=True)[:5]

In [145]:
neighbors_o2

['Israel_Palestine',
 'internationalpolitics',
 'The_Leftorium',
 'Labour',
 'SocialismIsCapitalism']

Let's visualise. GePhi doesn't support multigraph, so I'll be loading up and filtering my simple graph instead.

In [146]:
g_simple = nx.read_gexf('../simple_graph.gexf')

In [148]:
# nodes_to_keep = set(s0 + neighbors_o1 + neighbors_o2
g_simple_sub = g_simple.subgraph([s0] + neighbors_o1 + neighbors_o2)
nx.write_gexf(g_simple_sub, 'example_filtered.gexf')

Visualising in GePhi yields the following graph:

![img](example_filtered.png)