# Skeleton-of-Thought (SoT) Prompting

2024–2025 breakthrough: Generate a skeleton outline first → fill sections in parallel → massive speed boost for long answers.

Perfect for: blog posts, reports, stories, code docs, research summaries.

In [None]:
!pip install openai python-dotenv --quiet

In [None]:
import openai, os, time
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") or input("Paste OpenAI key: ")
print("Ready!")

In [None]:
def skeleton_of_thought(query):
    print("Generating skeleton...")
    skeleton = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": f"Create a detailed numbered skeleton outline (8–15 points) for answering this:\n{query}\nOnly the outline, no full text."}],
        temperature=0.3
    ).choices[0].message.content

    print("\nSkeleton:\n" + skeleton + "\n")
    print("Filling sections in parallel...\n")

    points = [line.strip() for line in skeleton.split("\n") if line.strip() and any(c.isdigit() for c in line)]
    sections = []

    for i, point in enumerate(points, 1):
        print(f"Filling {i}/{len(points)}...")
        resp = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": f"Expand this outline point into full, detailed content:\n{point}\nTopic: {query}"}],
            temperature=0.7,
            max_tokens=600
        )
        sections.append(f"{point}\n{resp.choices[0].message.content.strip()}")
        time.sleep(0.2)

    final = "# " + query + "\n\n" + "\n\n".join(sections)
    print("\nCOMPLETE!\n" + "="*60)
    return final

In [None]:
query = input("What should I write about? ")
result = skeleton_of_thought(query)
print(result)