# Generating Synthetic Entities with `Outlines`


Plan: Given a domain/industry, need to generate synthetic entities that are comprised of:

1. First, given a domain/industry name and a description of that domain, generate a list of N possible job titles (along with job description)

IndustryJobs - Industry Name - Industry Description - Job Titles

2. Then for each job title/description generate, generate a job entity:

Job Entity - Job Title (str) - Job Description (str) - Associated Job Postings/Position (List[str]) - Job Skills (List[str])


In [1]:
import os
from enum import Enum
from getpass import getpass
from dataclasses import dataclass

from pydantic import BaseModel, conlist, constr

import outlines

In [2]:
os.environ["OPENAI_API_KEY"] = getpass("OpenAI API Key: ")

In [3]:
class Job(BaseModel):
    job_title: str
    job_description: str


class IndustryJobs(BaseModel):
    industry_name: str
    industry_description: str
    industry_jobs: conlist(Job, min_length=5, max_length=10)  # type: ignore

In [4]:
@outlines.prompt
def industry_jobs_prompt(name: str, description: str) -> IndustryJobs:
    """
    You are a expert human resources professional with broad, deep knowledge of talent profiles across every industry.
    Your job is to generate a list of diverse and popular job titles and corresponding descriptions that cover a range
    of functions, from foundational roles to innovative and emerging positions based on a provided industry name and description.

    Here is the new industry you need to generate jobs for:
    Industry Name: {{ name }}
    Industry Description: {{ description }}
    Jobs List:
    """

In [5]:
from outlines import models

model = models.openai("gpt-4")

In [6]:
prompt = industry_jobs_prompt(
    name="Software Development",
    description="Software development is the process of conceiving, specifying, designing, programming, documenting, testing, and bug fixing involved in creating and maintaining applications, frameworks, or other software components.",
)

In [9]:
outlines.generate.json(model, IndustryJobs)

NotImplementedError: Cannot use JSON Schema-structure generation with an OpenAI model due to the limitations of the OpenAI API