In [49]:
import os
from praw import Reddit
from praw.models import Comment, Subreddit, Submission
from praw.models.listing.mixins.subreddit import CommentHelper
from dotenv import load_dotenv
from datetime import datetime
import seaborn as sns
from functools import reduce
from typing import Union

In [50]:
# load local environment variables
env_path = "../.env"
load_dotenv(dotenv_path=env_path)

# establish reddit connection
conn = Reddit(client_id=os.getenv("RCLIENT_ID"),
                    client_secret=os.getenv("RSECRET_KEY"),
                    password=os.getenv("RPASSWORD"),
                    username=os.getenv("RUSERNAME"),
                    user_agent=os.getenv("RUSER_AGENT"))

# define subreddit
subr = conn.subreddit("opiates")

# define constants
SUBR_NAME = "opiates"

In [51]:
def utc_to_dt(utc: float) -> datetime:
    """Convert a unix time to a python datetime."""
    return datetime.utcfromtimestamp(int(utc))

In [60]:
class Post():
    """
    Represents an intersection of attributes from Submission and Comment objects.
    """
    
    # Defines a type representing a Submission OR a Comment
    SubComm: Union = Union[Submission, Comment]
    
    def __init__(self, subcomm: SubComm) -> None:
        """Initialize attributes of this object."""
        # initialize attributes particular to Submissions and Comments
        if isinstance(subcomm, Submission):
            self.text = subcomm.selftext
            self.parent_id = None
            self.is_sub = True
        elif isinstance(subcomm, Comment):
            self.text = subcomm.body
            self.parent_id = subcomm.parent_id
            self.is_sub = False
        else:
            raise ValueError("The given object is not a Submission or Comment.")
            
        # initialize attributes common between Submissions and Comments
        self.author = subcomm.author
        self.time = utc_to_dt(subcomm.created_utc)
        self.id = subcomm.id
        self.score = subcomm.score
            
    def __eq__(self, obj: Any) -> bool:
        """Determine if the given object equals this object."""
        return isinstance(obj, Post) and (obj.id == self.id) and (obj.is_sub == self.is_sub)
    
    def __ne__(self, obj: Any) -> bool:
        """Determine if the given object does not equal this object."""
        return not obj == self

In [64]:
def parse_comm_forest(root: CommentHelper, start_time: datetime) -> List[Post]:
    """
    Parse all comments from the given comment forest (contained in a CommentHelper instance) as Post objects.
    
    :param root: a praw CommentHelper instance
    :param start_time: a datetime representing the time to start extracting comments
    
    :returns posts: a list of all comments posted after the given time
    """
    posts: List[Post] = []
    for comment in root:
        comm_time: datetime = utc_to_dt(comment.created_utc)
        if comm_time > start_time:
            posts.append(Post(comment))
            if len(comment.replies) > 0:
                posts += parse_comm_forest(comment.replies, start_time)
    return posts

In [65]:
def extract_comments(subr: Subreddit, start_time: datetime) -> List[Post]:
    """
    Extract all submissions and comments in the subreddit that are after the given start time and store as Post objects.
    
    :param subr: a Subreddit of a praw Reddit instance 
    :param start_time: a datetime representing the time to start extracting comments
    
    :returns comments: a list of all comments posted after the given time
    """
    posts: List[Post] = reduce(lambda base, sub: base + [Post(sub)] + parse_comm_forest(sub.comments, start_time), subr, [])
    return posts

In [66]:
start_date = datetime(2019, 12, 15)
posts = extract_comments(subr.new(limit=10), start_date)