In [5]:
import logging
from collections import defaultdict

import networkx as nx
import praw
import pandas as pd
import tqdm.notebook as tqdm
from dotenv import load_dotenv
from tinydb import TinyDB, Query

In [2]:
logging.basicConfig(level=logging.INFO)
load_dotenv()

Mod = Query()
Bot = Query()
Sub = Query()

In [5]:
KNOWN_BOTS = {
    'mod-mentions',
    'hive-protect',
    'comment-nuke',
    'purge-user',
    'spam-nuke',
    'modmail-userinfo',
    'onedayflair',
    'AutoModerator'
}

In [6]:
class Wanderer:

    reddit = None
    db_mods = None
    db_bots = None
    db_subs = None
    
    def __init__(self):
        self.reddit = self.reddit or praw.Reddit(
            client_id=os.getenv("CLIENT_ID"),
            client_secret=os.getenv("CLIENT_SECRET"),
            user_agent=os.getenv("USER_AGENT"),
            username=os.getenv("USER_NAME"),
            password=os.getenv("USER_PASS")
        )
        self.db_mods = self.db_mods or TinyDB('mods.json')
        self.db_bots = self.db_bots or TinyDB('bots.json')
        self.db_subs = self.db_subs or TinyDB('subs.json')
        
        self.subs_searched = set()
        self.mods_searched = set()


    def get_sub_size(self, sub_name):
        if not self.db_subs.search(Sub.name == sub_name):
            nsubscr = self.reddit.subreddit(sub_name).subscribers
            self.db_subs.insert({'name': sub_name, 'nsubscr': nsubscr})
            return nsubscr
        else:
            return self.db_subs.search(Sub.name == sub_name)[0]['nsubscr']

    
    def wander(self, depth, start_sub_name=None, mods_queue=None, subs_queue=None):
        self.mods_queue = mods_queue or []
        self.subs_queue = subs_queue or []
        if isinstance(start_sub_name, list):
            self.subs_queue = [self.reddit.subreddit(s) for s in start_sub_name]
        elif isinstance(start_sub_name, str):
            self.subs_queue = [self.reddit.subreddit(start_sub_name)]

        while depth >= 0:
            for sub in tqdm.tqdm(self.subs_queue):
                sub_name = sub.display_name
                logging.info(f'Processing sub {sub_name}')
                for mod in sub.moderator():
                    mod_name = mod.name
                    if self.is_bot(mod):
                        logging.info(f'BOT: {mod_name}')
                    elif mod_name not in self.mods_searched:
                        self.mods_queue.append(mod)
                        logging.info(f'Added mod {mod_name} to queue')
                self.subs_searched.add(sub_name)
            logging.info(f'{len(self.mods_queue)} mods in queue')
            self.subs_queue = []

            for mod in tqdm.tqdm(self.mods_queue):
                mod_name = mod.name
                logging.info(f'Processing mod {mod_name}')
                for sub in mod.moderated():
                    sub_name = sub.display_name
                    if sub.subscribers < 1000:
                        logging.info(f'TINY SUB: {sub_name}')
                        continue
                    if not (self.db_mods.search((Mod.mod == mod_name) & (Mod.sub == sub_name))):
                        self.db_mods.insert({'mod': mod_name, 'sub': sub_name})
                    if sub_name not in self.subs_searched:
                        self.subs_queue.append(sub)
                        logging.info(f'Added sub {sub_name} to queue')
                self.mods_searched.add(mod_name)
            logging.info(f'{len(self.subs_queue)} subs in queue')
            self.mods_queue = []
    
            depth -= 1


    def is_bot(self, mod):
        mod_name = mod.name
        if self.db_bots.search(Bot.name == mod_name):
            return True
        if mod_name.lower().startswith('bot'):
            self.db_bots.insert({'name': mod_name})
            return True
        if mod_name.lower().endswith('bot'):
            self.db_bots.insert({'name': mod_name})
            return True
        if mod_name in KNOWN_BOTS:
            self.db_bots.insert({'name': mod_name})
            return True
        if len(mod.moderated()) > 500:
            self.db_bots.insert({'bot': mod_name})
            return True 
        return False
            

In [7]:
wanderer = Wanderer()
# wanderer.wander(1, 'Palestine')

In [8]:
df_mods = pd.DataFrame(wanderer.db_mods.all())
print(df_mods['sub'].nunique())
print(df_mods['sub'].count())

1265
2078


In [11]:
for sub in tqdm.tqdm(df_mods['sub'].unique()):
    wanderer.get_sub_size(sub)

  0%|          | 0/1265 [00:00<?, ?it/s]

In [12]:
df_subs = pd.DataFrame(wanderer.db_subs.all())
df_mods = df_mods.merge(df_subs, left_on='sub', right_on='name', how='left')

In [13]:
df_mods = df_mods[df_mods['nsubscr'] > 5000]

In [14]:
print(df_mods['sub'].nunique())
print(df_mods['sub'].count())

997
1724


In [16]:
df_nodes = df_mods.groupby('sub')['mod'].apply(list).reset_index()
selected_subs = df_nodes[df_nodes['mod'].str.len() >= 5]['sub'].unique()
len(selected_subs)

In [17]:
filter = df_mods['sub'].isin(selected_subs)
df_edges = df_mods[filter].groupby('mod')['sub'].apply(list).reset_index()

In [18]:
print(len(df_edges))
df_edges.head()

295


Unnamed: 0,mod,sub
0,--intifada--,[PublicFreakout]
1,002_timmy,[ConeHeads]
2,1080pfullhd-60fps,[therewasanattempt]
3,3olives,[Palestine]
4,4evrstreetmetalbitch,"[MorbidReality, TalesFromYourServer, gratefuldoe]"


In [19]:
edge_weight_dict = defaultdict(int)
for _, row in tqdm.tqdm(df_edges.iterrows(), total=len(df_edges)):
    if len(row['sub']) > 1:
        mod = row['mod']
        for i, sub1 in enumerate(row['sub']):
            for sub2 in row['sub'][:i]:
                key = (sub1, sub2) if sub1 < sub2 else (sub2, sub1)
                edge_weight_dict[key] += 1 

  0%|          | 0/295 [00:00<?, ?it/s]

In [20]:
g = nx.Graph()

for _, row in df_subs[df_subs['name'].isin(selected_subs)].iterrows():
    g.add_node(row['name'], size=row['nsubscr'])

for (sub1, sub2), weight in edge_weight_dict.items():
    g.add_edge(sub1, sub2, weight=weight)

In [21]:
nx.write_gexf(g, 'mods-palestine.gexf')