#### 1 - Importing necessary libraries

In [9]:
from typing import List
from langchain.chains.llm import LLMChain
from langchain_core.prompts import PromptTemplate

from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
import json
from langchain_openai import ChatOpenAI

In [10]:
from dotenv import load_dotenv

load_dotenv("../.env")

True

#### 2 - Defining the data structures and the parser

In [12]:
# Base data structure
class PromptObject(BaseModel):
    prompt: str = Field(description="prompt provided by the user")
    cluster: str = Field(description="Cluster to which the prompt belongs to")
    sub_classification: str = Field(
        description="Sub classification of the prompt within the cluster"
    )


# Data structure storing the objects
class PromptObjectList(BaseModel):
    prompt_objects: List[PromptObject] = Field(
        description="List of PromptObject objects"
    )


parser = PydanticOutputParser(pydantic_object=PromptObjectList)

#### 3 - Prompt Template

In [19]:
template = """
Generate a total of 15 prompts after considering the following information:

Prompt topic: {topic}
Prompt sub-topic: {sub_topic}

Each prompt should be unique and should be related to the topic and sub-topic provided. The prompt should be realistic and should resemble an actual user's prompt.

Make sure you generate 15 different prompts and that none of them are identical. Please 
follow the instructions given below on how the output should be structured:

{format_instructions}

"""

prompt_template = PromptTemplate(
    input_variables=["topic", "sub_topic"],
    template=template,
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [20]:
print(prompt_template)

input_variables=['sub_topic', 'topic'] partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"prompt_objects": {"title": "Prompt Objects", "description": "List of PromptObject objects", "type": "array", "items": {"$ref": "#/definitions/PromptObject"}}}, "required": ["prompt_objects"], "definitions": {"PromptObject": {"title": "PromptObject", "type": "object", "properties": {"prompt": {"title": "Prompt", "description": "prompt provided by the user", "type": "string"}, "cluster": {"title": "Cluster", "description": "Cluster to which 

#### 4 - Initialzing the LLM and Chain

In [21]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.4)

chain = prompt_template | llm | parser

#### 5 - Testing the Prompt Generation

In [15]:
topic = "Business and Productivity"
sub_topic = "Email Generation"

In [16]:
result = chain.invoke({"topic": topic, "sub_topic": sub_topic})

In [18]:
result.prompt_objects

[PromptObject(prompt="Can you help me draft a follow-up email to a client who hasn't responded to my last message about our proposal?", cluster='Business and Productivity', sub_classification='Email Generation'),
 PromptObject(prompt='I need to write a professional email to my team announcing the changes in our project deadlines. What should I include?', cluster='Business and Productivity', sub_classification='Email Generation'),
 PromptObject(prompt='What is the best way to compose a thank-you email to a colleague who helped me with a recent project?', cluster='Business and Productivity', sub_classification='Email Generation')]

In [56]:
# Invoke the chain and print raw output
try:
    raw_output = chain.invoke({"topic": topic, "sub_topic": sub_topic})
    print("Raw Output:", raw_output)

    # Store result in a variable
    result = chain.invoke({"topic": topic, "sub_topic": sub_topic})
    print("Parsed Result:", result)
except Exception as e:
    print("Error:", e)

Error: Failed to parse PromptObjectList from completion [{"prompt": "Can you help me draft a follow-up email to a client who hasn't responded to my last message regarding the project proposal?", "cluster": "Client Communication", "sub_classification": "Follow-up Emails"}, {"prompt": "I need to write a thank-you email to my team for their hard work on the recent project. Can you suggest a positive and motivating tone?", "cluster": "Team Management", "sub_classification": "Appreciation Emails"}, {"prompt": "What should I include in an introductory email to potential partners for a new business venture? I want to make a strong first impression.", "cluster": "Networking", "sub_classification": "Introductory Emails"}]. Got: 1 validation error for PromptObjectList
__root__
  PromptObjectList expected dict not list (type=type_error)


#### 6 - Creating the dataset

In [23]:
clusters = [
    "Communication",
    "Music and Audio",
    "Programming and Development",
    "Business and Productivity",
]

sub_classifications = [
    ["Chatbots and Virtual Assistants", "Conversation", "Mental Health"],
    ["Music Creation", "Speech Generation", "Podcast Content Creation"],
    ["Coding and Programming Assistance", "APi Integration"],
    ["Presentation Creation", "Email Generation"],
]

for cluster_index in range(len(clusters)):
    cluster = clusters[cluster_index]
    sub_classes = sub_classifications[cluster_index]
    for sub_class in sub_classes:
        print(f"Generating prompts for {cluster} - {sub_class}")
        try:
            prompts = chain.invoke({"topic": cluster, "sub_topic": sub_class})

            # Store result in a variable
            result = chain.invoke(
                {
                    "topic": clusters[cluster],
                    "sub_topic": sub_classifications[cluster][sub_class],
                }
            )
            print("Parsed Result:", result)
        except Exception as e:
            print("Error:", e)

Generating prompts for Communication - Chatbots and Virtual Assistants
Generating prompts for Communication - Conversation
Generating prompts for Communication - Mental Health
Generating prompts for Music and Audio - Music Creation
Generating prompts for Music and Audio - Speech Generation
Generating prompts for Music and Audio - Podcast Content Creation
Generating prompts for Programming and Development - Coding and Programming Assistance
Generating prompts for Programming and Development - APi Integration
Generating prompts for Business and Productivity - Presentation Creation
Generating prompts for Business and Productivity - Email Generation
