In [None]:
!pip install praw openai python-dotenv


In [11]:
import os

# Set up your Reddit credentials
os.environ['REDDIT_CLIENT_ID'] = 'YOUR_REDDIT_CLIENT_ID'
os.environ['REDDIT_CLIENT_SECRET'] = 'YOUR_REDDIT_CLIENT_SECRET'
os.environ['REDDIT_USER_AGENT'] = 'colab-script/1.0'

# Set up your OpenAI API key
os.environ['OPENAI_API_KEY'] = 'YOUR_OPENAI_API_KEY'

In [12]:
import os
import re
import praw
import openai

def extract_username_from_url(url: str) -> str:
    m = re.match(r"https?://(www\.)?reddit\.com/user/([\w\-]+)/?", url)
    if not m:
        raise ValueError("Invalid Reddit user URL.")
    return m.group(2)

def get_reddit_instance():
    return praw.Reddit(
        client_id=os.environ['REDDIT_CLIENT_ID'],
        client_secret=os.environ['REDDIT_CLIENT_SECRET'],
        user_agent=os.environ['REDDIT_USER_AGENT']
    )

def fetch_user_data(username: str, post_limit=20, comment_limit=50):
    reddit = get_reddit_instance()
    redditor = reddit.redditor(username)
    posts = []
    comments = []

    for post in redditor.submissions.new(limit=post_limit):
        posts.append({
            "id": post.id,
            "title": post.title,
            "text": post.selftext,
            "subreddit": str(post.subreddit),
            "url": f"https://www.reddit.com{post.permalink}"
        })

    for comment in redditor.comments.new(limit=comment_limit):
        comments.append({
            "id": comment.id,
            "body": comment.body,
            "subreddit": str(comment.subreddit),
            "url": f"https://www.reddit.com{comment.permalink}"
        })

    return {"posts": posts, "comments": comments}

def compile_llm_prompt(username, posts, comments):
    content_lines = []
    content_lines.append(f"Reddit user: {username}\n")
    content_lines.append("\n### Recent Posts:")
    for post in posts:
        line = f"[{post['subreddit']}] Title: {post['title']}\nText: {post['text']}\nURL: {post['url']}\n"
        content_lines.append(line)
    content_lines.append("\n### Recent Comments:")
    for comment in comments:
        line = f"[{comment['subreddit']}] Comment: {comment['body']}\nURL: {comment['url']}\n"
        content_lines.append(line)
    content_chunk = "\n".join(content_lines)
    prompt = f"""
You are an expert in digital anthropology. Given the following Reddit user's posts and comments, create a detailed user persona for this individual. For each characteristic you give in the persona (such as interests, personality, tone, occupation guesses, etc.), **cite** the post or comment (with its text or relevant excerpt and the provided URL) that supports your inference.

Be specific and comprehensive, including:
- User display name (if any)
- Interests, Hobbies, Favorite topics/subreddits
- Style of communication
- Evidence of occupation or demographic
- Values, beliefs, notable personality traits
- Any patterns in posting time or content
- Anything else you can reasonably infer

Format each attribute like:
Trait: Explanation (cite: "comment/post excerpt..." —[URL])

Here is the Reddit user's content:

{content_chunk}

--- End of user data ---
"""
    return prompt

def generate_persona(prompt):
    openai.api_key = os.environ['OPENAI_API_KEY']
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", # or "gpt-4" if you have access
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1200,
        temperature=0.3,
    )
    return response["choices"][0]["message"]["content"]

def save_persona(username, persona_text):
    filename = f"{username}_persona.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(persona_text)
    print(f"\nPersona saved to {filename}")


In [16]:
import os
os.environ['REDDIT_CLIENT_ID'] = 'txUxOPtq6nOgmMwX_Asa3Q'
os.environ['REDDIT_CLIENT_SECRET'] = 'blG9nqdGAdNiUR59RzXRkQ5LDlxXRg'
os.environ['REDDIT_USER_AGENT'] = 'chaitanya-script/0.1 by YogurtclosetOwn2910'


# Set up your OpenAI API key
os.environ['OPENAI_API_KEY'] = 'YOUR_OPENAI_API_KEY'

In [None]:
# Provide the Reddit user URL
user_url = "https://www.reddit.com/user/kn0thing/"

# Process
username = extract_username_from_url(user_url)
print("Fetching Reddit data...")
data = fetch_user_data(username)
print(f"Fetched {len(data['posts'])} posts and {len(data['comments'])} comments.")

prompt = compile_llm_prompt(username, data['posts'], data['comments'])

print("Generating persona (this can take a minute)...")
persona = generate_persona(prompt)

# Show and download
print(persona)
save_persona(username, persona)
