In [1]:
# load env with api keys https://stackoverflow.com/a/54028874
%load_ext dotenv
%dotenv ../etc/config.env

import sys
sys.path.append("../")

import os
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

from desci_sense.parsers.base_parser import BaseParser
from desci_sense.postprocessing.parser_utils import fix_json_string_with_backslashes
from desci_sense.configs import init_config


In [2]:
# Define a new Pydantic model with field descriptions and tailored for Twitter.
class TwitterUser(BaseModel):
    name: str = Field(description="Full name of the user.")
    handle: str = Field(description="Twitter handle of the user, without the '@'.")
    age: int = Field(description="Age of the user.")
    hobbies: List[str] = Field(description="List of hobbies of the user.")
    email: str = Field(description="Email address of the user.")
    bio: str = Field(description="Bio or short description about the user.")
    location: str = Field(description="Location or region where the user resides.")
    is_blue_badge: bool = Field(
        description="Boolean indicating if the user has a verified blue badge."
    )
    joined: str = Field(description="Date the user joined Twitter.")
    gender: str = Field(description="Gender of the user.")
    appearance: str = Field(description="Physical description of the user.")
    avatar_prompt: str = Field(
        description="Prompt for generating a photorealistic avatar image. The image should capture the essence of the user's appearance description, ideally in a setting that aligns with their interests or bio. Use professional equipment to ensure high quality and fine details."
    )
    banner_prompt: str = Field(
        description="Prompt for generating a banner image. This image should represent the user's hobbies, interests, or the essence of their bio. It should be high-resolution and captivating, suitable for a Twitter profile banner."
    )

In [3]:
# Instantiate the parser with the new model.
parser = PydanticOutputParser(pydantic_object=TwitterUser)

# Update the prompt to match the new query and desired format.
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(
            "answer the users question as best as possible.\n{format_instructions}\n{question}"
        )
    ],
    input_variables=["question"],
    partial_variables={
        "format_instructions": parser.get_format_instructions(),
    },
)

In [4]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"title": "Name", "description": "Full name of the user.", "type": "string"}, "handle": {"title": "Handle", "description": "Twitter handle of the user, without the \'@\'.", "type": "string"}, "age": {"title": "Age", "description": "Age of the user.", "type": "integer"}, "hobbies": {"title": "Hobbies", "description": "List of hobbies of the user.", "type": "array", "items": {"type": "string"}}, "email": {"title": "Email", "description": "Email address of the user.", "type": "string"}, "bio": {"title": "Bio", "des

In [5]:
# Generate the input using the updated prompt.
user_query = (
    "Generate a detailed Twitter profile of a random realistic user with a diverse background, "
    "from any country in the world, original name, including prompts for images. Come up with "
    "real name, never use most popular placeholders like john smith and john doe."
)
_input = prompt.format_prompt(question=user_query)
print(_input.to_messages())

[HumanMessage(content='answer the users question as best as possible.\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"title": "Name", "description": "Full name of the user.", "type": "string"}, "handle": {"title": "Handle", "description": "Twitter handle of the user, without the \'@\'.", "type": "string"}, "age": {"title": "Age", "description": "Age of the user.", "type": "integer"}, "hobbies": {"title": "Hobbies", "description": "List of hobbies of the user.", "type": "array", "items": {"type": "string"}}, "email": {"title": "Email", "description": "Email 

In [6]:



config = init_config()
nano_parser = BaseParser(config=config)


                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


In [7]:
chat_model = nano_parser.model

In [8]:
output = chat_model(_input.to_messages())
fixed_content = fix_json_string_with_backslashes(output.content)
parsed = parser.parse(fixed_content)
print(output.content)
print(parsed)

AttributeError: 'AIMessage' object has no attribute 'con'

In [14]:
from langchain.output_parsers import OutputFixingParser
from langchain.schema import OutputParserException

try:
    parsed = parser.parse(output.content)
except OutputParserException as e:
    new_nano_parser = BaseParser(config=config)
    new_parser = OutputFixingParser.from_llm(
        parser=parser,
        llm=new_nano_parser.model
    )
    parsed = new_parser.parse(output.content)

                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


OutputParserException: Failed to parse TwitterUser from completion {
"name": "Ava Rodriguez",
"handle": "avarodriguez",
"age": 27,
"hobbies": ["traveling", "photography", "cooking", "reading"],
"email": "avarodriguez@email.com",
"bio": "A wanderlust-filled, food-loving, bookworm from Spain. Always up for new adventures and exploring different cultures!",
"location": "Barcelona, Spain",
"is\_blue\_badge": true,
"joined": "June 2010",
"gender": "Female",
"appearance": "Curly brown hair, dark eyes, freckles and a slim build, with a warm smile and a friendly demeanor.",
"avatar\_prompt": "A photorealistic portrait of Ava, captured in a serene beach setting, with a vibrant and colorful background that reflects her adventurous spirit and love for photography.",
"banner\_prompt": "A banner image that showcases Ava's passion for cooking and exploring different cuisines, with a beautiful kitchen setup and colorful ingredients in the background, surrounded by a cozy and inviting atmosphere."
}. Got: Invalid \escape: line 9 column 4 (char 325)