In [1]:
import os
import datetime as dt
import json

import polars as pl
from praw import Reddit
from pydantic import BaseModel, TypeAdapter

# 1. Extract data from Reddit

We use PRAW https://pypi.org/project/praw/, "Python Reddit API Wrapper".

Login to Reddit and create an app at https://www.reddit.com/prefs/apps

![Reddit app](img/reddit-app.png)

(Image from https://www.jcchouinard.com/get-reddit-api-credentials-with-praw/)

In [17]:
reddit_client_id = os.environ["REDDIT_CLIENT_ID"]
reddit_client_secret = os.environ["REDDIT_SECRET"]
reddit_username = os.environ["REDDIT_USERNAME"]  # Just for assembling the user agent

In [3]:
# Read-only Reddit connection https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#read-only-reddit-instances
reddit = Reddit(
    client_id=reddit_client_id,
    client_secret=reddit_client_secret,
    user_agent=f"social summarizer by u/{reddit_username}",
)
reddit.user.me(), reddit.read_only

(None, True)

In [4]:
selected_subreddit_names = [
    "r/jokes",
    "r/askscience",
    "r/explainlikeimfive",
    "r/LifeProTips",
    "r/GetMotivated",
    "r/AmItheAsshole"
]
target_subreddit_name = "r/AmItheAsshole"

In [5]:
from IPython.display import Markdown

In [6]:
subreddit = reddit.subreddit(target_subreddit_name.removeprefix("r/"))
print(subreddit.title)
Markdown(subreddit.description[:1_000] + "...")

Am I the Asshole? 


#Welcome to r/AmITheAsshole!

A catharsis for the frustrated moral philosopher in all of us, and a place to finally find out if you were wrong in a real-world argument that's been bothering you. Tell us about any non-violent conflict you have experienced; give us both sides of the story, and find out if you're right, or you're the asshole.

This is the sub to lay out your  actions and conflicts and get impartial judgment rendered against you.  Were you the asshole in that situation or not? Post should reflect real situations, and abide by the rules below.

After 18 hours, your post will be given a flair representing the final judgment on your matter.  This flair is determined by the subscribers who have both rendered judgment and voted on which judgment is best.  ***The power of the crowd will judge you***.  If your top level comment has the highest number of upvotes in a thread, you will get a flair point. More details are listed in [our FAQ](https://www.reddit.com/r/AmItheAssho...

In [7]:
# Possible parameters https://old.reddit.com/dev/api#GET_new
submissions = []
for submission in subreddit.new(limit=2):
    submissions.append({
        "title": submission.title,
        "author_name": submission.author.name,
        "creation_datetime": dt.datetime.utcfromtimestamp(submission.created_utc).isoformat(),
        "subreddit_name": submission.subreddit_name_prefixed,
        "num_comments": submission.num_comments,
        "sfw": not submission.over_18,
        "score": submission.score,
        "upvote_ratio": submission.upvote_ratio,
        "is_self": submission.is_self,
        "permalink": submission.permalink,
        "selftext": submission.selftext,
        "flair_text": submission.link_flair_text,
    })

In [8]:
json.dumps(submissions)[:1_000] + "..."

'[{"title": "AITAH for not having the same sexual preferences now that I\\u2019m sober", "author_name": "Hairymatt", "creation_datetime": "2024-03-24T05:37:00", "subreddit_name": "r/AmItheAsshole", "num_comments": 2, "sfw": true, "score": 1, "upvote_ratio": 1.0, "is_self": true, "permalink": "/r/AmItheAsshole/comments/1bmddk3/aitah_for_not_having_the_same_sexual_preferences/", "selftext": "I got sober when my current partner and I got together.\\n Before this I was in a FWB situation with someone who was into various kinks. I explored these with them, reliving some experiences I hadn\\u2019t had in a while after having a couple drinks.\\n\\nThen I got sober and the desire to do those things dissipated. As well as most of my sex drive. \\n\\nMy current partner is upset about my sexual past given that it was very close to the beginning of our relationship. They\\u2019re feeling undesirable and think I find them disgusting because I don\\u2019t act like I did prior to the relationship.\\n

Notice the `creation_datetime` is `str`, for easier serialization!

In [9]:
submissions[-1]["creation_datetime"]

'2024-03-24T05:34:21'

# 2. Model the data

We use Pydantic https://pypi.org/project/pydantic/, a popular Python library for data validation

In [10]:
class RedditSubmission(BaseModel):
    title: str
    author_name: str
    creation_datetime: dt.datetime
    subreddit_name: str
    num_comments: int
    sfw: bool
    score: int
    upvote_ratio: float
    is_self: bool
    permalink: str
    selftext: str | None
    flair_text: str | None

In [11]:
adapter = TypeAdapter(list[RedditSubmission])

In [12]:
objects = adapter.validate_python(submissions)
objects

[RedditSubmission(title='AITAH for not having the same sexual preferences now that I’m sober', author_name='Hairymatt', creation_datetime=datetime.datetime(2024, 3, 24, 5, 37), subreddit_name='r/AmItheAsshole', num_comments=2, sfw=True, score=1, upvote_ratio=1.0, is_self=True, permalink='/r/AmItheAsshole/comments/1bmddk3/aitah_for_not_having_the_same_sexual_preferences/', selftext='I got sober when my current partner and I got together.\n Before this I was in a FWB situation with someone who was into various kinks. I explored these with them, reliving some experiences I hadn’t had in a while after having a couple drinks.\n\nThen I got sober and the desire to do those things dissipated. As well as most of my sex drive. \n\nMy current partner is upset about my sexual past given that it was very close to the beginning of our relationship. They’re feeling undesirable and think I find them disgusting because I don’t act like I did prior to the relationship.\n\nAm I an asshole?', flair_text=

Pydantic automatically converted the str datetime to an actual `datetime.datatime` object, as specified in the model:

In [13]:
objects[-1].creation_datetime

datetime.datetime(2024, 3, 24, 5, 34, 21)

# 3. Convert to a dataframe

We use Polars https://pypi.org/project/polars/, a nascent dataframe library with an expressive API and blazing fast performance

In [14]:
df = pl.from_dicts(objects)
df.head()

title,author_name,creation_datetime,subreddit_name,num_comments,sfw,score,upvote_ratio,is_self,permalink,selftext,flair_text
str,str,datetime[μs],str,i64,bool,i64,f64,bool,str,str,str
"""AITAH for not …","""Hairymatt""",2024-03-24 05:37:00,"""r/AmItheAsshol…",2,True,1,1.0,True,"""/r/AmItheAssho…","""I got sober wh…",
"""AITA for shout…","""Certain-Ad4543…",2024-03-24 05:34:21,"""r/AmItheAsshol…",2,True,1,1.0,True,"""/r/AmItheAssho…","""I (27) have be…",
