In [16]:
from openai import OpenAI
import os

In [17]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [18]:
response = client.responses.create(
    model="gpt-5", input="Write a one-sentence bedtime story about a unicorn."
)

print(response.output_text)

Under a velvet moon, a shy unicorn with a silver mane tiptoed through the whispering meadow, leaving trails of starlight that curled into sweet dreams for every sleeping child.


In [34]:
from openai import OpenAI
from pydantic import BaseModel
import pandas as pd
import os
import requests
from bs4 import BeautifulSoup
import time
import random

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

prompt = """
I'm writing a literature review of the field of AI alignment for 2025. My goal is to find the most interesting posts from the past year that represent advancements in the field.

You should go through the list of posts and identify posts that seem like an advancement in the field of AI alignment.

Here are categories of posts you should keep:
- Posts that link to a paper or introduce an advancement in the field of AI alignment
- Posts that have high karma
- Posts that are high-quality critiques of a specific research agenda
- High-quality literature reviews
- High quality position posts

For each batch of 10 posts, you should typically keep 1-3 of the best posts in the group.

If there are multiple exceptionally high-quality posts, you may keep up to 5 posts.

For each post, you should output a JSON object with the following fields:
- keep: true if the post should be kept, false otherwise
- reason: a short reason for keeping or not keeping the post
- category: the category of the post (e.g. "critique", "review", "position", "advancement")
- field: the field of AI alignment that the post is about (e.g. "interpretability", "RL safety", "agent foundations" etc.). Note that you may discover new fields of AI alignment that are not already in the list.
"""


class PostFilteringResponse(BaseModel):
    keep: bool
    reason: str
    category: str
    field: str


class BatchPostFilteringResponse(BaseModel):
    posts: list[PostFilteringResponse]


def get_post_content(url: str, max_words: int = 500) -> str:
    """
    Fetch the post content from the URL and return the first max_words words.
    """
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    try:
        response = requests.get(url, timeout=10, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")

        # Find the main post content (adjust selector based on site structure)
        # Alignment Forum typically uses these classes
        content_div = soup.find("div", class_="PostsPage-postContent")
        if not content_div:
            content_div = soup.find("article")
        if not content_div:
            content_div = soup.find("div", class_="post-body")

        if content_div:
            # Get all text, removing extra whitespace
            text = content_div.get_text(separator=" ", strip=True)
            # Split into words and take first max_words
            words = text.split()
            truncated_text = " ".join(words[:max_words])
            return truncated_text
        else:
            return "Could not extract post content"

    except Exception as e:
        print(f"Error fetching content: {e}")
        return f"Error fetching content: {str(e)}"


def filter_posts(posts: pd.DataFrame, batch_size: int = 10) -> pd.DataFrame:
    """
    Filter the posts and keep only the most interesting ones. Use the AI's output JSON to add new columns to the DataFrame.
    Processes posts in batches for efficiency.
    """
    results = []
    total_posts = len(posts)

    # Process posts in batches
    for batch_start in range(0, total_posts, batch_size):
        batch_end = min(batch_start + batch_size, total_posts)
        batch_posts = posts.iloc[batch_start:batch_end]

        print(
            f"Processing batch: posts {batch_start + 1} to {batch_end} of {total_posts}"
        )

        # Fetch content for all posts in the batch
        batch_contents = []
        for idx, (_, row) in enumerate(batch_posts.iterrows(), 1):
            print(f"  Fetching content for post {batch_start + idx}...")
            content = get_post_content(row["link"], max_words=500)
            batch_contents.append(content)
            # Longer delay between requests (2-4 seconds with randomization)
            time.sleep(random.uniform(2, 4))

        # Create a formatted message with all posts in the batch
        batch_info = "Please analyze these posts and return a JSON array with one object for each post. Remember to keep approximately 1 in 10 posts.\n\n"
        for idx, (_, row) in enumerate(batch_posts.iterrows(), 1):
            batch_info += f"""Post {idx}:
Title: {row["title"]}
Karma: {row["karma"]}
Date: {row["date"]}
Link: {row["link"]}

Content (first 500 words):
{batch_contents[idx - 1]}

---

"""

        try:
            response = client.responses.parse(
                model="gpt-5-mini",
                input=[
                    {"role": "system", "content": prompt},
                    {
                        "role": "user",
                        "content": batch_info,
                    },
                ],
                reasoning={
                    "effort": "minimal",
                },
                text_format=BatchPostFilteringResponse,
            )

            parsed_response = response.output_parsed

            # Add the response data to results for each post in the batch
            for idx, (_, row) in enumerate(batch_posts.iterrows()):
                if idx < len(parsed_response.posts):
                    post_response = parsed_response.posts[idx]
                    results.append(
                        {
                            "title": row["title"],
                            "link": row["link"],
                            "karma": row["karma"],
                            "date": row["date"],
                            "keep": post_response.keep,
                            "reason": post_response.reason,
                            "category": post_response.category,
                            "field": post_response.field,
                        }
                    )
                else:
                    # Handle case where we got fewer responses than expected
                    print(f"Warning: Missing response for post {batch_start + idx + 1}")
                    results.append(
                        {
                            "title": row["title"],
                            "link": row["link"],
                            "karma": row["karma"],
                            "date": row["date"],
                            "keep": False,
                            "reason": "Missing response from API",
                            "category": "error",
                            "field": "unknown",
                        }
                    )

        except Exception as e:
            print(f"Error processing batch starting at post {batch_start + 1}: {e}")
            # Add the original rows with error values for the entire batch
            for _, row in batch_posts.iterrows():
                results.append(
                    {
                        "title": row["title"],
                        "link": row["link"],
                        "karma": row["karma"],
                        "date": row["date"],
                        "keep": False,
                        "reason": f"Error: {str(e)}",
                        "category": "error",
                        "field": "unknown",
                    }
                )

    return pd.DataFrame(results)

In [35]:
import pandas as pd

# Usage example:
input_df = pd.read_csv("all-alignment-forum-posts-2025.csv")
output_df = filter_posts(input_df)
output_df.to_csv("filtered-alignment-forum-posts-2025-3.csv", index=False)

# Show summary
kept_posts = output_df[output_df["keep"] == True]
print(f"\nTotal posts: {len(output_df)}")
print(f"Posts to keep: {len(kept_posts)}")
print(f"Percentage kept: {len(kept_posts) / len(output_df) * 100:.1f}%")

Processing batch: posts 1 to 10 of 438
  Fetching content for post 1...
  Fetching content for post 2...
  Fetching content for post 3...
  Fetching content for post 4...
  Fetching content for post 5...
  Fetching content for post 6...
  Fetching content for post 7...
  Fetching content for post 8...
  Fetching content for post 9...
  Fetching content for post 10...
Processing batch: posts 11 to 20 of 438
  Fetching content for post 11...
  Fetching content for post 12...
  Fetching content for post 13...
  Fetching content for post 14...
  Fetching content for post 15...
  Fetching content for post 16...
  Fetching content for post 17...
  Fetching content for post 18...
  Fetching content for post 19...
  Fetching content for post 20...
Processing batch: posts 21 to 30 of 438
  Fetching content for post 21...
  Fetching content for post 22...
  Fetching content for post 23...
  Fetching content for post 24...
  Fetching content for post 25...
  Fetching content for post 26...
  Fetc

In [38]:
kept_posts.to_csv("alignment-forum-posts-2025-ai-filtered.csv", index=False)