In [12]:
!pip install --upgrade --quiet  praw


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import praw
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from extras.constants import *
from pprint import pprint


In [11]:
# Initialize the Reddit API client
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT
)

class Comment(BaseModel):
    author: Optional[str] = None
    body: str
    score: int

class PostInfo(BaseModel):
    title: str
    url: str
    subreddit: str
    posted_by: Optional[str] = None
    comments: List[Comment]

class QueryResult(BaseModel):
    query: str
    posts: List[PostInfo]

def get_top_comments(post, limit=5):
    # Retrieve top-level comments
    
    post.comments.replace_more(limit=limit)
    comments = post.comments.list()

    # Sort comments by score and get top comments
    top_comments = sorted(comments, key=lambda c: c.score, reverse=True)[:limit]

    comment_info = []
    for comment in top_comments:
        comment_info.append(Comment(
            author=str(comment.author) if comment.author else None,
            body=comment.body,
            score=comment.score
        ))

    return comment_info

def get_top_posts(query, limit=2):
    # Search for the query
    search_results = reddit.subreddit('all').search(query, sort='relevance', limit=limit)

    posts = []
    for post in search_results:
        post_info = PostInfo(
            title=post.title,
            url=post.url,
            subreddit=post.subreddit.display_name,
            posted_by=str(post.author) if post.author else None,
            comments=get_top_comments(post)
        )
        posts.append(post_info)

    return QueryResult(query=query, posts=posts)


query = 'how to learn python'
result = get_top_posts(query)

# Print result as a dict
result_dict = result.dict()
print(result_dict)


{'query': 'how to learn python', 'posts': [{'title': 'How to learn python?', 'url': 'https://www.reddit.com/r/devops/comments/17s813q/how_to_learn_python/', 'subreddit': 'devops', 'posted_by': 'ak17hg', 'comments': [{'author': 'Bloodrose_GW2', 'body': 'My usual way is: pick a problem and start coding.', 'score': 37}, {'author': None, 'body': 'Read in order : \n\n- Learn Python 3 The Hard Way\n- Learn More Python 3 The Hard Way\n- Fluent Python \n- Effective Python', 'score': 13}, {'author': 'JetreL', 'body': 'it truly hurts me to say this but ... *this is the way!*', 'score': 11}, {'author': 'Leonjy92', 'body': 'Try out the Udemy course Automate The Boring Stuff in Python. It has great reviews and teaches you the fundamental while guiding you in creating projects that you might find useful in real life.', 'score': 10}, {'author': 'gingimli', 'body': 'CS50P is the Python one, the original CS50 is mostly C and then switches to Python towards the middle. I think the original CS50 is much 

In [5]:
pprint(result_dict.keys())

dict_keys(['query', 'posts'])


In [10]:
result_dict['posts']

[{'title': 'I’m the hacker that brought down North Korea’s Internet For Over A Week. AMA',
  'url': 'https://www.reddit.com/r/IAmA/comments/1divlp3/im_the_hacker_that_brought_down_north_koreas/',
  'subreddit': 'IAmA',
  'posted_by': 'dotslashpunk',
  'comments': [{'author': 'dotslashpunk',
    'body': 'It was. The actual attack - pretty simple and easy. The recon required to know WHAT to attack was the kind of creative part. I\'m not a super genius computer hacker like the people below are claiming I\'m trying to act like... I\'m actually a pretty normal dude. I\'m a decent hacker because I fucking love it and live for it, but that\'s all I can really say about me and my abilities.\n\nSo here\'s how it went down. At first yep, it was just your basic DoS attack. Not just DDoS, they had outdated nginx servers and I found some CVEs that I could write some n-days for for memory exhaustion. That was nifty. I also hit their web servers with slow polling attacks just for additional instabili