In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, json
import pandas as pd
import subprocess
import time
from random import sample
from os import listdir
from os.path import isfile, join
import re
import praw
from supabase import create_client, Client
import random

In [3]:
creds = json.load(open(".db-creds.json"))

url = creds["SUPABASE_URL"]
key = creds["SUPABASE_KEY"]
supabase = create_client(url, key)

In [4]:
reddit_creds = json.load(open(".reddit_creds.json", "r"))
reddit = praw.Reddit(
    client_id=reddit_creds["client_id"],
    client_secret=reddit_creds["client_secret"],
    password=reddit_creds["password"],
    user_agent=reddit_creds["user_agent"],
    username=reddit_creds["username"],
)

In [5]:
r_all = reddit.subreddit('all')

In [24]:
def get_comment_data(comment, usernames, posts_result):    
    try:
        username = str(comment.author.name)
        if username[0:2] == '0x':
            username = username[2:]
        profile_pic = comment.author.icon_img
    except:
        username = "removed"
        profile_pic = "removed"

    if username not in usernames:
        user_db_data = {
            "reddit_username": username, 
            "profile_pic": profile_pic
        }
        user_data = supabase.table("users").insert(user_db_data).execute().data
        usernames.update({user_data[0]['reddit_username']: user_data[0]['id']})
        user_id = user_data[0]['id']
    else:
        user_id = usernames[username]

    comment_data = {
        "post_id": posts_result["id"],
        "user_id": user_id,
        "body": comment.body,
        "depth": comment.depth,
        "reddit_id": comment.name,
        "reddit_upvotes": comment.ups,
        "reddit_downvotes": comment.downs,
    }
    return comment_data

In [29]:
def add_comment(comment, comments, comment_reddit_ids, head_comments=None):
    if comment.name in comment_reddit_ids:
        return

    if type(comment) == praw.models.reddit.comment.Comment:
        if head_comments is not None:
            head_comments.append(comment.name)
        comment_data = get_comment_data(comment, usernames, posts_result)
        comment_reddit_ids.append(comment_reddit_ids[0]['reddit_id'])
        comments.append(comment_data)
#     else:
#         print(type(comment))
#         comments.extend(get_comments(comment.comments(), comment_reddit_ids, usernames, posts_result))

In [30]:
def get_comments(comment_list, comment_reddit_ids, usernames, posts_result):
    comments = []
    head_comments = []
    for comment in comment_list[:10]:
        add_comment(comment, comments, comment_reddit_ids, head_comments)
    for comment in comment_list[10:]:
        if comment.name in head_comments:
            add_comment(comment, comments)
        
    return comments

In [31]:
def get_data(table_name, select, initial_len=0):
    i = initial_len
    prev_len = -1
    data = []
    while (len(data) - initial_len) % 1000 == 0 and prev_len != len(data):
        prev_len = len(data)
        data.extend(supabase.table(table_name).select(select).range(i, i+1000).execute().data)
        i += 1000
    return data

# DB writer

In [28]:
minted_time = 0
last_block = 0


post_reddit_ids = [row['reddit_id'] for row in get_data("posts", 'reddit_id')]
comment_reddit_ids = [row['reddit_id'] for row in get_data("comments", "reddit_id")]
categories = {row['name']: row['id'] for row in get_data("categories", "id, name")}
usernames = {row['reddit_username']: row['id'] for row in get_data("users", "id, reddit_username")}

while True:
    # Mint reddit posts every hour
    if (time.time() - minted_time) / 60 > 30:
        post_reddit_ids.extend([row['reddit_id'] for row in get_data("posts", 'reddit_id', len(post_reddit_ids))])
        comment_reddit_ids.extend([row['reddit_id'] for row in get_data("comments", "reddit_id", len(comment_reddit_ids))])
        categories.update({row['name']: row['id'] for row in get_data("categories", "id, name", len(categories))})
        usernames.update({row['reddit_username']: row['id'] for row in get_data("users", "id, reddit_username", len(usernames))})
        
        posts = []
        for i, post in enumerate(r_all.top(time_filter='hour')):
            if post.name in post_reddit_ids:
                continue
                
            if i > 10:
                continue
                     
            category = post.subreddit.display_name
            if category not in categories:
                category_data = supabase.table("categories").insert({"name": category}).execute().data
                category_id = category_data[0]['id']
                categories.update({category_data[0]['name']: category_id})
            else:
                category_id = categories[category]

            try:
                username = str(post.author.name)
                if username[0:2] == '0x':
                    username = username[2:]
                profile_pic = post.author.icon_img
            except:
                username = "removed"
                profile_pic = "removed"

            if username not in usernames:
                user_db_data = {
                    "reddit_username": username, 
                    "profile_pic": profile_pic
                }
                user_data = supabase.table("users").insert(user_db_data).execute().data
                usernames.update({user_data[0]['reddit_username']: user_data[0]['id']})
                user_id = user_data[0]['id']
            else:
                user_id = usernames[username]

            post_data = {
                "category_id": category_id,
                "user_id": user_id,
                "title": post.title,
                "body": post.selftext,
                "url": post.url,
                "is_nsfw": post.over_18,
                "reddit_id": post.name,
                "reddit_upvotes": post.ups,
                "reddit_downvotes": post.downs,
            }
            posts.append(post_data)
            posts_result = supabase.table("posts").insert(post_data).execute().data[0]
            post_reddit_ids.append(user_data[0]['reddit_id'])

            comments = get_comments(post.comments.list(), comment_reddit_ids, usernames, posts_result)
                
            supabase.table("comments").insert(comments).execute().data
            print(f"added {len(comments)} comments")
            
            user_ids = list(usernames.values())
            follow_ids = random.choices(user_ids, k=min(len(user_ids), 10))
            follower_ids = random.choices(user_ids, k=min(len(user_ids), 10))
            
            for i in follow_ids:
                supabase.table("follows").insert({"follower_id": user_id, "following_id": i}).execute()
            for i in follower_ids:
                supabase.table("follows").insert({"follower_id": i, "following_id": user_id}).execute()
        
        print(f"added {len(posts)} posts")
        print('')
        minted_time = time.time()
    
    time.sleep(1)

added 10 comments
[{'id': 25264, 'created_at': '2022-08-19T22:13:32.246627+00:00', 'username': None, 'profile_pic': 'https://styles.redditmedia.com/t5_3s0bx6/styles/profileIcon_snoo5436dc05-481f-4021-bfd9-c9f236e44a65-headshot-f.png?width=256&height=256&crop=256:256,smart&s=00e8d72cafd97fca981a49fd874aed4803e19ab2', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit_airdrop_value': 0, 'reddit_airdrop_claimed': 'not_claimed', 'reddit_username': 'tello8010', 'github_username': None, 'discord_username': None, 'email': None, 'daily_payout_claimed': False, 'exp_to_next_level': 83}]
added 4 comments
[{'id': 25267, 'created_at': '2022-08-19T22:13:39.742763+00:00', 'username': None, 'profile_pic': 'https://www.redditstatic.com/avatars/defaults/v2/avatar_default_0.png', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit_air

added 10 comments
[{'id': 25363, 'created_at': '2022-08-19T22:45:41.765608+00:00', 'username': None, 'profile_pic': 'https://styles.redditmedia.com/t5_560x63/styles/profileIcon_snoob5a8529a-2f32-417d-a210-1f7e94e7f79e-headshot.png?width=256&height=256&crop=256:256,smart&s=40ee36c98bf216e1032d9b3d003b546ad0375030', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit_airdrop_value': 0, 'reddit_airdrop_claimed': 'not_claimed', 'reddit_username': 'RareDestroyer8', 'github_username': None, 'discord_username': None, 'email': None, 'daily_payout_claimed': False, 'exp_to_next_level': 83}]
added 10 comments
[{'id': 25373, 'created_at': '2022-08-19T22:45:52.369822+00:00', 'username': None, 'profile_pic': 'https://www.redditstatic.com/avatars/defaults/v2/avatar_default_2.png', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit

added 10 comments
[{'id': 25462, 'created_at': '2022-08-19T23:17:46.529759+00:00', 'username': None, 'profile_pic': 'https://styles.redditmedia.com/t5_2hpjwi/styles/profileIcon_snoo90a4df14-5a60-473e-ac08-6606bd36fe4b-headshot.png?width=256&height=256&crop=256:256,smart&s=0106cd415591949f091862e7c2464d7453495270', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit_airdrop_value': 0, 'reddit_airdrop_claimed': 'not_claimed', 'reddit_username': 'Jingles013', 'github_username': None, 'discord_username': None, 'email': None, 'daily_payout_claimed': False, 'exp_to_next_level': 83}]
added 8 comments
[{'id': 25471, 'created_at': '2022-08-19T23:17:56.19798+00:00', 'username': None, 'profile_pic': 'https://www.redditstatic.com/avatars/defaults/v2/avatar_default_1.png', 'msa_id': None, 'transaction_hash': None, 'wallet_address_personal': None, 'wallet_address_provided': None, 'exp': 0, 'level': 0, 'reddit_airdr

APIError: {'code': '23505', 'details': 'Key (reddit_id)=(t3_wsrdil) already exists.', 'hint': None, 'message': 'duplicate key value violates unique constraint "post_reddit_id_key"'}

In [22]:
posts_result

{'id': 1430,
 'created_at': '2022-08-19T00:41:49.490235+00:00',
 'title': "I'm going to have to rethink my idea of decorating a cake as a side gig and stick with my day job.",
 'body': '',
 'url': 'https://i.redd.it/mf9lvnfe9ki91.jpg',
 'user_id': 24488,
 'category_id': 568,
 'reddit_id': 't3_wryyxd',
 'reddit_upvotes': 144,
 'reddit_downvotes': 0,
 'is_nsfw': False,
 'ipfs_hash': None,
 'transaction_hash': None}

In [11]:
t = get_data("posts", 'reddit_id', post_reddit_ids)

In [12]:
t[0]

't3_wgg1ov'