# Synthetic Resume Generator using Multiple LLMs

## Objective
Generate synthetic resumes (Junior, Mid-Level, Senior) using:

1. OpenRouter API (2 models)
2. Hugging Face Transformers Pipeline (2 models)

We compare:
- JSON validity
- Structure consistency
- Experience differentiation
- Diversity

Author: [Your Name]

In [1]:
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
from transformers import pipeline as hf_pipeline
import gradio as gr

In [None]:
#!uv add accelerate

[2mResolved [1m277 packages[0m [2min 9.53s[0m[0m
[2mPrepared [1m1 package[0m [2min 3.27s[0m[0m
[2mInstalled [1m1 package[0m [2min 3.67s[0m[0m
 [32m+[39m [1maccelerate[0m[2m==1.12.0[0m


In [2]:
load_dotenv(override=True)
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
hf_token = os.getenv('HF_TOKEN')

if not openrouter_api_key:
    print("No API key was found")
elif not openrouter_api_key.startswith("sk"):
    print("An API key was found, but it doesn't start with sk; please check you're using the right key")
else:
    print("API key found and looks good so far!")

#get Hugging Face token
if not hf_token:
    print("No HuggingFace token found")
elif not hf_token.startswith("hf_"):
    print("HF token found but doesn't start with hf_; please check")
else:
    print("HuggingFace token found and looks good!")

openrouter = OpenAI(base_url='https://openrouter.ai/api/v1', api_key=openrouter_api_key)

API key found and looks good so far!
HuggingFace token found and looks good!


In [3]:
RESUME_SCHEMA = """
[
  {
    "name": "string",
    "experience_level": "Junior | Mid | Senior",
    "education": [
      {
        "degree": "string",
        "field": "string",
        "institution": "string",
        "year": "string"
      }
    ],
    "skills": ["string"],
    "work_experience": [
      {
        "job_title": "string",
        "company": "string",
        "years": "string",
        "responsibilities": ["string"],
        "achievements": ["string"]
      }
    ],
    "certifications": ["string"],
    "projects": ["string"]
  }
]
"""

def build_prompt(experience_level, role):
    return f"""
You are a synthetic resume generator.

Generate 1 fictional resume for a {experience_level} {role}.

Requirements:
- Ensure experience matches level.
- Senior: 8+ years, leadership, measurable impact.
- Mid: 3-7 years, strong contributions.
- Junior: 0-2 years, internships/projects.
- Make names diverse.
- Do NOT use real people.
- Return ONLY valid JSON (no markdown, no backticks, no explanation).
- Follow this schema exactly:

{RESUME_SCHEMA}
"""

In [4]:
def format_resume_md(json_text, model_name):
    """Convert raw JSON resume text into nicely formatted markdown."""
    clean = json_text.strip()
    if clean.startswith("```"):
        clean = clean.split("\n", 1)[-1]
        clean = clean.rsplit("```", 1)[0].strip()

    try:
        resumes = json.loads(clean)
        if isinstance(resumes, dict):
            resumes = [resumes]
    except Exception as e:
        return f"### ‚ö†Ô∏è {model_name}\n\n**JSON parse error:** `{e}`\n\n```\n{json_text[:600]}\n```"

    md = f"## ü§ñ {model_name}\n\n"
    for r in resumes:
        md += f"---\n### üë§ {r.get('name', 'N/A')}\n"
        md += f"**Experience Level:** {r.get('experience_level', 'N/A')}\n\n"

        md += "#### üéì Education\n"
        for edu in r.get('education', []):
            md += f"- {edu.get('degree')} in {edu.get('field')} ‚Äî {edu.get('institution')} ({edu.get('year')})\n"

        skills = r.get('skills', [])
        if skills:
            md += f"\n#### üõ†Ô∏è Skills\n"
            md += ", ".join(f"`{s}`" for s in skills) + "\n"

        md += "\n#### üíº Work Experience\n"
        for job in r.get('work_experience', []):
            md += f"**{job.get('job_title')}** @ {job.get('company')} _{job.get('years')}_\n"
            for resp in job.get('responsibilities', []):
                md += f"  - {resp}\n"
            for ach in job.get('achievements', []):
                md += f"  - ‚úÖ {ach}\n"
            md += "\n"

        certs = r.get('certifications', [])
        if certs:
            md += "#### üìú Certifications\n"
            for c in certs:
                md += f"- {c}\n"

        projects = r.get('projects', [])
        if projects:
            md += "\n#### üöÄ Projects\n"
            for p in projects:
                md += f"- {p}\n"

        md += "\n"
    return md

In [5]:
def stream_openrouter(model, prompt, temperature):
    """Stream from OpenRouter, yielding incremental raw text."""
    messages = [
        {"role": "system", "content": "You generate structured JSON resumes. Return ONLY raw JSON, no markdown."},
        {"role": "user", "content": prompt}
    ]
    stream = openrouter.chat.completions.create(
        model=model,
        messages=messages,
        stream=True,
        temperature=temperature
    )
    collected = ""
    for chunk in stream:
        delta = chunk.choices[0].delta.content or ""
        collected += delta
        yield collected


def run_hf(model_name, prompt, temperature):
    """Run a HuggingFace model and return the generated text (no streaming)."""
    generator = hf_pipeline(
        "text-generation",
        model=model_name,
        device_map="auto",
        token=hf_token
    )
    output = generator(
        prompt,
        max_new_tokens=1200,
        temperature=temperature,
        do_sample=True
    )
    full = output[0]["generated_text"]
    # Strip the prompt prefix that HF returns
    return full[len(prompt):].strip()

In [6]:
def generate_resumes(or_model_1, or_model_2, hf_model_1, hf_model_2,
                     experience_level, role, temperature):
    """
    Generator that yields (out1, out2, out3, out4) tuples so Gradio
    updates all four panels incrementally as each model finishes.
    """
    prompt = build_prompt(experience_level, role)
    out1 = out2 = out3 = out4 = ""

    # --- OpenRouter model 1 (streamed) ---
    raw1 = ""
    for raw1 in stream_openrouter(or_model_1, prompt, temperature):
        out1 = f"## ü§ñ {or_model_1}\n\n‚è≥ _Generating..._\n\n```json\n{raw1}\n```"
        yield out1, out2, out3, out4
    out1 = format_resume_md(raw1, or_model_1)
    yield out1, out2, out3, out4

    # --- OpenRouter model 2 (streamed) ---
    raw2 = ""
    for raw2 in stream_openrouter(or_model_2, prompt, temperature):
        out2 = f"## ü§ñ {or_model_2}\n\n‚è≥ _Generating..._\n\n```json\n{raw2}\n```"
        yield out1, out2, out3, out4
    out2 = format_resume_md(raw2, or_model_2)
    yield out1, out2, out3, out4

    # --- HuggingFace model 1 ---
    out3 = f"## ü§ó {hf_model_1}\n\n‚è≥ _Loading model ‚Äî this may take a moment..._"
    yield out1, out2, out3, out4
    raw3 = run_hf(hf_model_1, prompt, temperature)
    out3 = format_resume_md(raw3, hf_model_1)
    yield out1, out2, out3, out4

    # --- HuggingFace model 2 ---
    out4 = f"## ü§ó {hf_model_2}\n\n‚è≥ _Loading model ‚Äî this may take a moment..._"
    yield out1, out2, out3, out4
    raw4 = run_hf(hf_model_2, prompt, temperature)
    out4 = format_resume_md(raw4, hf_model_2)
    yield out1, out2, out3, out4

In [None]:
OPENROUTER_MODEL_CHOICES = [
    "openai/gpt-oss-120b",
    "x-ai/grok-4",
]

HF_MODEL_CHOICES = [
    "meta-llama/Llama-3.1-8B",
    "google/gemma-7b-it",
]

with gr.Blocks(theme=gr.themes.Soft(), title="Synthetic Resume Generator") as demo:

    gr.Markdown(
        """
        # üìÑ Synthetic Resume Generator
        Compare resume generation across **2 OpenRouter models** and **2 open-source HuggingFace models** side by side.
        """
    )

    # ---- Controls ----
    with gr.Row():
        with gr.Column(scale=2):
            role_input = gr.Textbox(
                label="üßë‚Äçüíº Role / Job Title",
                placeholder="e.g. Backend Engineer, Data Scientist, DevOps Engineer",
                value="Backend Engineer"
            )
        with gr.Column(scale=1):
            experience_input = gr.Radio(
                choices=["Junior", "Mid", "Senior"],
                value="Senior",
                label="üìä Experience Level"
            )
        with gr.Column(scale=1):
            temperature_input = gr.Slider(
                minimum=0.0, maximum=1.0, step=0.05, value=0.4,
                label="üé® Variety / Creativity"
            )

    with gr.Row():
        with gr.Column():
            gr.Markdown("#### üåê OpenRouter Models")
            or_model_1 = gr.Dropdown(
                choices=OPENROUTER_MODEL_CHOICES,
                value=OPENROUTER_MODEL_CHOICES[0],
                label="OpenRouter Model 1"
            )
            or_model_2 = gr.Dropdown(
                choices=OPENROUTER_MODEL_CHOICES,
                value=OPENROUTER_MODEL_CHOICES[1],
                label="OpenRouter Model 2"
            )
        with gr.Column():
            gr.Markdown("#### ü§ó HuggingFace Models")
            hf_model_1 = gr.Dropdown(
                choices=HF_MODEL_CHOICES,
                value=HF_MODEL_CHOICES[0],
                label="HuggingFace Model 1"
            )
            hf_model_2 = gr.Dropdown(
                choices=HF_MODEL_CHOICES,
                value=HF_MODEL_CHOICES[1],
                label="HuggingFace Model 2"
            )

    generate_btn = gr.Button("üöÄ Generate Resumes", variant="primary", size="lg")

    gr.Markdown("---")

    # ---- Output panels ----
    with gr.Row():
        out1 = gr.Markdown(value="*Output will appear here once generated...*")
        out2 = gr.Markdown(value="*Output will appear here once generated...*")

    with gr.Row():
        out3 = gr.Markdown(value="*Output will appear here once generated...*")
        out4 = gr.Markdown(value="*Output will appear here once generated...*")

    generate_btn.click(
        fn=generate_resumes,
        inputs=[or_model_1, or_model_2, hf_model_1, hf_model_2,
                experience_input, role_input, temperature_input],
        outputs=[out1, out2, out3, out4]
    )

demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]