In [1]:
import os
import ast
import sys
src_dir = os.path.join('..', 'src')
sys.path.append(os.path.abspath(src_dir))

import requests
import json
from os import path as osp
from datetime import datetime, timedelta
import praw

from security import KeyHandler
from data import path

VERSION = 'PedAPI/0.0.3'


class RedditClient:
    def __init__(self, verbose: bool = True):
        self.verbose_print = print if verbose else lambda *a, **k: None
        self.save_path = path("comments")

        self.session_time_out = datetime.now()
        self.kh = KeyHandler()
        self._login()
        self.save_path = path("comments")

    def _login(self):
        authorization = requests.auth.HTTPBasicAuth(
            self.kh.client_id,
            self.kh.secret_key)

        self.kh.headers['User-Agent'] = VERSION
        res = requests.post('https://www.reddit.com/api/v1/access_token',
                            auth=authorization,
                            data=self.kh.login_data,
                            headers=self.kh.headers)
        self.session_time_out = datetime.now() + timedelta(seconds=res.json()['expires_in'] - 60)

        token = res.json()['access_token']
        self.kh.headers['Authorization'] = f'bearer {token}'

        assert self._test_authentication()
        self.verbose_print('Successfully logged as {}'.format(self.kh.login_data['username']))
        self.verbose_print('Session expires on {}'.format(str(self.session_time_out)))

    def _test_authentication(self):
        if requests.get('https://oautch.reddit.com/api/v1/me', headers=self.kh.headers).status_code == 200:
            return True
        return False

    def _manage_session_time_out(self):
        if datetime.now() > self.session_time_out:
            self.verbose_print('Renewing session...')
            self._login()

    def get_comments(self, post_id: str):
        self._manage_session_time_out()

        link = 'https://oauth.reddit.com/r/wallstreetbets/comments/'
        res = requests.get(link + post_id, headers=self.kh.headers)

        with open(osp.join(self.save_path, post_id + '.json'), 'w') as outfile:
            json.dump(res.json(), outfile, indent=4)


class Praw:
    def __init__(self, verbose: bool = True):
        self.verbose_print = print if verbose else lambda *a, **k: None

        self.kh = KeyHandler()
        self._login()

    def _login(self):
        self.reddit = praw.Reddit(
            client_id=self.kh.client_id,
            client_secret=self.kh.secret_key,
            user_agent=VERSION,
            username=self.kh.login_data['username'],
            password=self.kh.login_data['password']
        )

    def get_submission(self, id_):
        return self.reddit.submission(id=id_)


In [2]:
client = Praw()

In [None]:
from time import sleep

In [3]:
submission = client.get_submission('nrblju')

In [4]:
for i, top_level_comment in enumerate(submission.comments):
    while True:
        try:
            submission.comments.replace_more(None)
            break
        except:
            print("Handling replace_more exception")
            sleep(1)
            
    print(i, top_level_comment.body, top_level_comment.created_utc)

0 Main mods I'm using are More Archotech Garbage, Science never stops AOTC, Rimatomics, Rimfactory, Misc Robots, VFE mechs plus a bunch of OP enemy factions to try (and fail) to balance the AOTC stuff a bit.

I've retextured the Cosmic reactors from AOTC as well to fit in more with the Archotech style of the base.  


Also I wouldn't suggest doing a circle style base, it is ridiculously resource and work intensive, does look pretty cool though. 1622721313.0
1 Ah, yes, the Arcotech and glitter tech "hell hole". Let's be clear, if you offer me a ride off of this base, I'll stay on the base. Pretty sure it's self sufficient at this point. 1622728569.0
2 I don't get how people can make such aesthetic bases. It's utilitarian rectangles all the way for me... 1622740894.0
3 A circular kill box... intriguing. 1622722668.0
4 Round AND mountain? Now this is quite the rare sight, most mountain bases on here are boxy and aren’t such an interesting shape. Good job. 1622722889.0
5 Off to crash somew

In [5]:
submission.num_comments

54

In [6]:
wanted = set(['all_awardings', 'body', 'created', 'created_utc', 'depth', 'downs', 'id', 'parent_id', 'replies', 'score', 'ups', 'data', 'kind', 'children', 'name'])

In [27]:
import pandas as pd
import numpy as np

from time import time, sleep

def replace_more(submission):
    while True:
        try:
            submission.comments.replace_more()
            break
        except:
            print("Handling replace_more exception")
            sleep(1)

def obtain_comments_for_id(praw_client, submission_id):
    starttime = time()
    print("Obtaining comments for submission '{}'...".format(submission_id), end='', flush=True)
    
    submission = praw_client.get_submission(submission_id)

    replace_more(submission)
    
    res = [obtain_comments(comment, submission_id) for comment in list(submission.comments)]
    
    if len(res) > 0:
        df = pd.concat(res, ignore_index=True)
        print(" [{} comments]".format(df.shape[0]), end='', flush=True)
    else:
        no_comments = [['', '', submission_id, '', 0, -1, 0, 0, 0]]
        df = pd.DataFrame(np.array(no_comments))
        print(" [no comments]", end='', flush=True)

    df.columns = ['id', 'parent_id', 'post_id', 'body', 'created_utc', 'depth', 'score', 'ups', 'downs']
    print("   finished in {} sec".format(np.around(time()-starttime,2)))
    return df
    

def obtain_comments(comment, submission_id):

    item = [comment.id, comment.parent_id[3:], submission_id, comment.body, comment.created_utc, comment.depth, comment.score, comment.ups, comment.downs]
        
    replies = []
    for reply in list(comment.replies):
        replies.extend([obtain_comments(reply, submission_id)])
    
    return pd.concat([pd.DataFrame(np.array([item]))] + replies)

In [8]:
obtain_comments_for_id(client, 'nrblju')[:3]

Obtaining comments for submission 'nrblju'... [54 comments]   finished in 0.52 sec


Unnamed: 0,id,parent_id,post_id,body,created_utc,depth,score,ups,downs
0,h0fg6t4,nrblju,nrblju,Main mods I'm using are More Archotech Garbage...,1622721313.0,0,42,42,0
1,h0gj95t,h0fg6t4,nrblju,"Why circles? Only my warehouses, stables and g...",1622738934.0,1,10,10,0
2,h0h7rr1,h0gj95t,nrblju,I've just done a lot of square boxy bases with...,1622749153.0,2,14,14,0


In [24]:
def identity(x: str) -> str:
    return x

In [25]:
identity(12)

12

In [26]:
from time import sleep

In [11]:
import os
import sys
src_dir = os.path.join('..', 'src')
sys.path.append(os.path.abspath(src_dir))

from data import save_dataset, get_dataset
from num_comments import sample_submissions

In [12]:
data = get_dataset('num_comments.csv')

../../data/datasets/num_comments.csv


In [13]:
sampled = sample_submissions(data)
sampled[:1]

Unnamed: 0,id,title,score,url,comms_num,created,body,timestamp,upvote_ratio,is_oc,...,body_stem_tokens,image_hue,image_sat,image_val,image_label,gme_price,gme_volume,ocr_text,ocr_text_word_count,num_comments
10665,l6b909,🚨🚨FAKE NEWS🚨🚨,11,https://i.redd.it/fep2zut65xd61.jpg,0,1611772000.0,,Wed Jan 27 19:32:30 2021,0.91,False,...,['nan'],0.490841,0.490841,0.490841,television,345.329987,7113878,"['option', 'activity', 'gamestop', 'open', 'in...",42,0


In [14]:
cut = sampled[sampled.num_comments >= sampled.num_comments.quantile(0.99)].copy()
cut.sort_values(by=['num_comments'], inplace=True)
cut.shape

(80, 47)

In [29]:
from tqdm.notebook import tqdm

# df = obtain_comments_for_id(client, cut.id[0])

i = 8000
for subm_id in tqdm(cut.id[-1:]):
    i += 1
    
    df = df.append(obtain_comments_for_id(client, subm_id))
    
#     if i % 10 == 0:
#         print("Autosaving {}".format(i))
#         save_dataset(df, 'new_comments_autosave_{}.csv'.format(i))

save_dataset(df, 'new_comments_all_greatest_80.csv')

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Obtaining comments for submission 'l6er79'...Handling replace_more exception
Handling replace_more exception
 [3636 comments]   finished in 141.81 sec



In [21]:
df[-2:]

Unnamed: 0,id,parent_id,post_id,body,created_utc,depth,score,ups,downs
3619,gl71z9j,l7iorh,l7iorh,"I think if we learned anything from this, OPEN...",1611895449.0,0,22,22,0
3620,gl724zv,l7iorh,l7iorh,I’m naming my future son (or daughter) deepfuc...,1611895528.0,0,22,22,0


In [19]:
save_dataset(df, 'new_comments_80.csv')

In [22]:
cut.id[-2:]

2470     l5c0nr
11710    l6er79
Name: id, dtype: object