###Synthetic Dataset Generator with LLMs (Anthropic API)Everything runs with your Anthropic API key — no model downloads

In [None]:
# Imports and API setup

import os
import json
import requests
import gradio as gr
from dotenv import load_dotenv

In [None]:
# Load variables from .env file
load_dotenv()

# Get your Anthropic API key
API_KEY = os.getenv("API_KEY")

if not API_KEY:
    raise ValueError(" API_KEY not found. Check your .env file")

print("API key loaded successfully!")


API key loaded successfully!


In [None]:
# Anthropic endpoint
API_URL = "https://api.anthropic.com/v1/messages"

#see the models i can have access to
r = requests.get(
    "https://api.anthropic.com/v1/models",
    headers={
        "x-api-key": API_KEY,
        "anthropic-version": "2023-06-01"
    },
)
print(r.json() if r.ok else r.text)


{'data': [{'type': 'model', 'id': 'claude-haiku-4-5-20251001', 'display_name': 'Claude Haiku 4.5', 'created_at': '2025-10-15T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-5-20250929', 'display_name': 'Claude Sonnet 4.5', 'created_at': '2025-09-29T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-1-20250805', 'display_name': 'Claude Opus 4.1', 'created_at': '2025-08-05T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-20250514', 'display_name': 'Claude Opus 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-20250514', 'display_name': 'Claude Sonnet 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-7-sonnet-20250219', 'display_name': 'Claude Sonnet 3.7', 'created_at': '2025-02-24T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-5-haiku-20241022', 'display_name': 'Claude Haiku 3.5', 'created_at': '2024-10-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-haiku-20240307', 'display_name': 'Claude Haiku 3', 'created_at': '

In [None]:
# Models to compare (variety)
MODELS = {
    "Claude 3 Haiku": "claude-3-haiku-20240307",     # fast & cheap
    "Claude Haiku 4.5": "claude-haiku-4-5-20251001",
    "Claude Sonnet 4.5": "claude-sonnet-4-5-20250929",     # fast & cheap
    "Claude Opus 4.1": "claude-opus-4-1-20250805",
    "Claude Opus 4": "claude-opus-4-20250514",     # fast & cheap
    "Claude Sonnet 4": "claude-sonnet-4-20250514",   # balanced
    "Claude Sonnet 3.7": "claude-3-7-sonnet-20250219"        # powerful (slowest)
}


Synthetic Dataset Generation Function

In [None]:
# Dataset generator

def generate_dataset(topic, n_records, model_choice):
    prompt = f"""
You are a data generator creating synthetic datasets.
Generate {n_records} records about {topic}.
Output only a valid JSON array (no explanations or markdown).
Each record should have 4–6 fields and look realistic but fake.
"""

    headers = {
        "x-api-key": API_KEY,
        "content-type": "application/json",
        "anthropic-version": "2023-06-01",
    }

    payload = {
        "model": model_choice,
        "max_tokens": 500,
        "temperature": 0.7,
        "messages": [{"role": "user", "content": prompt}],
    }

    response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
    result = response.json()

    if "content" in result and len(result["content"]) > 0:
        return result["content"][0]["text"]
    else:
        return f"Error: {result}"


Gradio UI

In [None]:
#  simple Gradio UI for dataset generation

def ui_generate(topic, n_records, model_label):
    model_id = MODELS[model_label]
    n_records = min(int(n_records), 5)  # limit for demo purposes
    return generate_dataset(topic, n_records, model_id)

# gradio block
with gr.Blocks(css=".gradio-container {max-width: 600px !important; margin: auto;}") as demo:
    gr.Markdown("## Synthetic Dataset Generator using LLM APIs (Claude)")

    with gr.Row():
        topic = gr.Textbox(label="Dataset Topic", value="Employee Records")
        n_records = gr.Number(label="Number of Records (Max 5 for demo purposes)", value=3)

    model_choice = gr.Dropdown(
        label="Choose Model",
        choices=list(MODELS.keys()),
        value="Claude 3 Haiku"
    )

    btn = gr.Button("🚀 Generate")

    # Scrollable, compact output area
    output = gr.Code(label="Generated JSON Dataset", language="json", lines=15, interactive=False)

    btn.click(ui_generate, inputs=[topic, n_records, model_choice], outputs=[output])

demo.launch()


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




Save Output to File

In [None]:
def save_dataset_to_file(data, filename="synthetic_dataset.json"):
    try:
        parsed = json.loads(data)
    except:
        print("Not valid JSON, saving as plain text instead.")
        with open(filename, "w", encoding="utf-8") as f:
            f.write(data)
        return

    with open(filename, "w", encoding="utf-8") as f:
        json.dump(parsed, f, indent=2)
    print(f"Dataset saved as {filename}")

