In [None]:
import praw
import random
import time
import pandas as pd
from datetime import datetime
import os

# Authenticate
reddit = praw.Reddit(
    client_id="",
    client_secret="",
    user_agent="script:fetcher:v1.0 (by u/YOUR_USERNAME)"
)

# Choose a subreddit
subreddit = reddit.subreddit("DamnThatsInteresting")

# Initialize counters
ama_count = 0
post_count = 0

# Create data directory if it doesn't exist
data_dir = 'data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Create Excel filename
excel_filename = os.path.join(data_dir, f"reddit_metrics_{subreddit.display_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx")

# Create Excel writer
with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
    # Create empty DataFrame with columns
    columns = ['id', 'title', 'content', 'author', 'subreddit', 'subreddit_id', 'url', 
              'permalink', 'created_utc', 'num_comments', 'score', 'upvotes', 'downvotes',
              'upvote_ratio', 'is_self', 'link_flair_text', 'has_link', 'removed_by_mods']
    pd.DataFrame(columns=columns).to_excel(writer, index=False)

# Fetch top 10 hot posts
for post in subreddit.hot(limit=1):
    # Generate random timeout between 10 and 30 seconds
    timeout = random.randint(10, 30)
    time.sleep(timeout)
    
    # Convert UTC timestamp to datetime
    created_utc = datetime.fromtimestamp(post.created_utc)
    
    # Print post details
    print("-" * 40)
    print(f"ID: {post.id}")
    print(f"Title: {post.title}")
    print(f"Content: {post.selftext}")
    print(f"Author: {post.author}")
    print(f"Subreddit: {post.subreddit}")
    print(f"Subreddit ID: {post.subreddit_id}")
    print(f"URL: {post.url}")
    print(f"Permalink: {post.permalink}")
    print(f"Created UTC (raw): {post.created_utc}")
    print(f"Created UTC (formatted): {created_utc}")
    print(f"Number of Comments: {post.num_comments}")
    print(f"Score: {post.score}")
    print(f"Upvotes: {post.ups}")
    print(f"Downvotes: {post.downs}")
    print(f"Upvote Ratio: {post.upvote_ratio}")
    print(f"Is Self Post: {post.is_self}")
    print(f"Link Flair Text: {post.link_flair_text}")
    print("-" * 40)
    
    # Create single post DataFrame
    post_df = pd.DataFrame([{
        'id': post.id,
        'title': post.title,
        'content': post.selftext,
        'author': str(post.author),
        'subreddit': str(post.subreddit),
        'subreddit_id': post.subreddit_id,
        'url': post.url,
        'permalink': post.permalink,
        'created_utc': created_utc,  # Store the datetime object instead of raw timestamp
        'num_comments': post.num_comments,
        'score': post.score,
        'upvotes': post.ups,
        'downvotes': post.downs,
        'upvote_ratio': post.upvote_ratio,
        'is_self': post.is_self,
        'link_flair_text': post.link_flair_text,
        'has_link': bool(post.url) and not post.is_self,
        'removed_by_mods': post.removed_by_category == "moderator"
    }])
    
    # Append to Excel file
    with pd.ExcelWriter(excel_filename, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
        post_df.to_excel(writer, index=False, header=False, startrow=post_count + 1)
    
    post_count += 1
    
    # Count AMA posts
    if post.link_flair_text and 'AMA' in post.link_flair_text.upper():
        ama_count += 1

print(f"\nâœ… Data saved to {excel_filename}")

# Read the final Excel file to get summary statistics
df = pd.read_excel(excel_filename)

# Print summary statistics
print("\nðŸ“ˆ Summary Statistics:")
print(f"Total Posts Analyzed: {post_count}")
print(f"Average Upvotes: {df['upvotes'].mean():.2f}")
print(f"Average Comments: {df['num_comments'].mean():.2f}")
print(f"Posts with Links: {df['has_link'].sum()}")
print(f"Posts Removed by Mods: {df['removed_by_mods'].sum()}")
print(f"AMA Posts: {ama_count}")

# Print unique timestamps to verify
print("\nðŸ“… Timestamp Analysis:")
print(f"Number of unique timestamps: {df['created_utc'].nunique()}")
print("\nFirst few timestamps:")
print(df['created_utc'].head())


----------------------------------------
ID: 1hbthof
Title: r/Damnthatsinteresting is looking for new mods!
Content: Requirements:

* 1+ Year Old account
* Cannot already moderate a high traffic (1M+ subscribers) subreddit

Hi all! We're looking for new mods for damnthatsinteresting. We're currently a very small team and are looking to bring on 1 - 2 new mods  to help out. Leave a comment below with your timezone, potential hourly commitment, and a little about yourself to be considered.
Author: esoterix_luke
Subreddit: Damnthatsinteresting
Subreddit ID: t5_2xxyj
URL: https://www.reddit.com/r/Damnthatsinteresting/comments/1hbthof/rdamnthatsinteresting_is_looking_for_new_mods/
Permalink: /r/Damnthatsinteresting/comments/1hbthof/rdamnthatsinteresting_is_looking_for_new_mods/
Created UTC (raw): 1733923825.0
Created UTC (formatted): 2024-12-11 08:30:25
Number of Comments: 373
Score: 322
Upvotes: 322
Downvotes: 0
Upvote Ratio: 0.91
Is Self Post: True
Link Flair Text: Mod Applications Are Op