In [1]:
import praw
from dotenv import load_dotenv
import networkx as nx
import os

# Initializing calling the reddit API 

In [3]:
# Load environment variables from the .env file
load_dotenv()

# Retrieve tokens from environment variables
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')
user_agent = os.getenv('USER_AGENT')
# Initialize Reddit instance with credentials from the .env file
reddit = praw.Reddit(
    client_id=client_id,
    client_secret=client_secret,
    user_agent=user_agent
)


Making a test call to the reddit API

In [4]:
# Test to see if connection and authentication are successful
try:
    subreddit = reddit.subreddit('python')
    print("Subreddit title:", subreddit.title)
    print("Subreddit description:", subreddit.public_description)
except Exception as e:
    print("An error occurred when accessing subreddit:", e)


Subreddit title: Python
Subreddit description: The official Python community for Reddit! Stay up to date with the latest news, packages, and meta information relating to the Python programming language. 
---

If you have questions or are new to Python use r/LearnPython


# Top subreddits

In [9]:
# Fetch the top 20 subreddits
top_subreddits = reddit.subreddits.popular(limit=100)

sorted_top = sorted(top_subreddits, key=lambda x: x.subscribers, reverse=True)

for sub in sorted_top:
    print(sub.display_name, sub.subscribers)

funny 64817697
AskReddit 49026513
gaming 44144797
worldnews 42229077
todayilearned 38465785
memes 34721183
movies 33874936
pics 31191605
news 28892315
videos 26824982
DIY 25569800
nottheonion 24986586
mildlyinteresting 24075813
explainlikeimfive 22950565
AmItheAsshole 21785825
personalfinance 19987347
OldSchoolCool 18603591
Damnthatsinteresting 17990315
technology 17366167
wallstreetbets 16989040
relationship_advice 14396826
nba 13583174
pcmasterrace 13533188
interestingasfuck 12858189
anime 11593883
Unexpected 11112404
nfl 10861398
MadeMeSmile 10515063
politics 8684475
mildlyinfuriating 8583019
BeAmazed 8369159
facepalm 8236975
Minecraft 7852045
ChatGPT 7822074
soccer 7767252
leagueoflegends 7731095
buildapc 7431023
WTF 7058967
AskMen 6378462
dankmemes 5927898
BlackPeopleTwitter 5922289
Overwatch 5899019
coolguides 5584095
NoStupidQuestions 5076113
MapPorn 4889070
OnePiece 4739005
PublicFreakout 4697953
formula1 4484715
TikTokCringe 4445908
unpopularopinion 4392539
Steam 4265434
popcu

# Get random subreddit

In [19]:
n_random_subreddits = 50
number_artices = 20

for _ in range(n_random_subreddits):
    random_subreddit = reddit.subreddit("random")
    print(random_subreddit.display_name, random_subreddit.subscribers)

assholedesign 3032327
limbuscompany 53557
malefashionadvice 5965361
im14andthisisdeep 1085447
UFObelievers 102026
projecteternity 68345
ExperiencedDevs 231651
AeroPress 55031
unpopularopinion 4392554
Seattle 604919


In [22]:
import json
# Get a random subreddit
random_subreddit = reddit.subreddit("random")
print(f"Selected Subreddit: {random_subreddit.display_name}")

# Get the top 10 posts of the month
top_posts = random_subreddit.top("month", limit=10)

# Prepare data structure to save posts and comments
subreddit_data = {
    "subreddit": random_subreddit.display_name,
    "posts": []
}

for post in top_posts:
    post_data = {
        "title": post.title,
        "score": post.score,
        "text": post.selftext,
        "url": post.url,
        "id": post.id,
        "comments": []
    }

    # Fetch the top-level comments for each post
    post.comments.replace_more(limit=0)  # Avoid fetching "MoreComments" objects
    for comment in post.comments.list()[:10]:  # Limit to 10 comments per post
        post_data["comments"].append({
            "comment_id": comment.id,
            "author": comment.author.name if comment.author else "deleted",
            "body": comment.body,
            "score": comment.score
        })
    
    # Add the post data to subreddit_data
    subreddit_data["posts"].append(post_data)

# Save the data to a JSON file
with open("random_subreddit_top_posts.json", "w", encoding="utf-8") as f:
    json.dump(subreddit_data, f, indent=4)

print("Data saved to random_subreddit_top_posts.json")


Selected Subreddit: NYGiants


Call this function with 'time_filter' as a keyword argument.
  top_posts = random_subreddit.top("month", limit=10)


Data saved to random_subreddit_top_posts.json
