# Customize output format in prompt

In [9]:
import sys
sys.path.append('../../modules')

## Load model

In [1]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-3.5-turbo-0125")

## Load prompt

In [2]:
from langchain_core.prompts import load_prompt

path = '../prompts/ES.json'
prompt = load_prompt(path)
prompt

PromptTemplate(input_variables=['TABLA'], template='La siguiente TABLA representa el retorno acumulado anual de activos financieros.\n\nTABLA:\n\n{TABLA}\n\nPor favor, analize y explique el por qué de los valores más significativos, tanto positivos como negativos, en base a noticias.\n\nProporcione el enlace a las noticias que respaldan su análisis.')

## Custom Output Parser

### Create Pydantic model

In [3]:
from langchain_core.pydantic_v1 import BaseModel, Field

class News(BaseModel):
    stock: str = Field(description="stock symbol")
    date: str = Field(description="date of the news")
    value: float = Field(description="significant acumulated return value of the stock")
    title: str = Field(description="title of the news")
    url: str = Field(description="url of the news")
    source: str = Field(description="source of the news")
    explanation: str = Field(description="explanation of the news relevance to the stock")

### Associate Pydantic model with parser

In [4]:
from langchain_core.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=News)

### Add parser to prompt

In [5]:
prompt.template = prompt.template + '\n\n{format_instructions}\n'
prompt.template 

'La siguiente TABLA representa el retorno acumulado anual de activos financieros.\n\nTABLA:\n\n{TABLA}\n\nPor favor, analize y explique el por qué de los valores más significativos, tanto positivos como negativos, en base a noticias.\n\nProporcione el enlace a las noticias que respaldan su análisis.\n\n{format_instructions}\n'

In [6]:
prompt.partial_variables.update({"format_instructions": parser.get_format_instructions()})
prompt

PromptTemplate(input_variables=['TABLA'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"stock": {"title": "Stock", "description": "stock symbol", "type": "string"}, "date": {"title": "Date", "description": "date of the news", "type": "string"}, "value": {"title": "Value", "description": "significant acumulated return value of the stock", "type": "number"}, "title": {"title": "Title", "description": "title of the news", "type": "string"}, "url": {"title": "Url", "description": "url of the news", "type": "string"}, "source":

## Chain

### Define chain

In [7]:
chain = prompt | model | parser

### Preprocess input data

In [10]:
import utils

In [11]:
df = utils.calculate_yearly_returns(
    stocks=['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'TSLA'],
)

[*********************100%%**********************]  5 of 5 completed
  .groupby(df.index.year).pct_change().add(1)


### Invoke chain

In [12]:
output = chain.invoke({"TABLA": df})

In [13]:
output.dict()

{'stock': 'TSLA',
 'date': '2013-12-31',
 'value': 325.424311,
 'title': "Tesla's Record Deliveries",
 'url': 'https://www.cnbc.com/2014/01/14/tesla-deliveries-top-6900-in-q4.html',
 'source': 'CNBC',
 'explanation': "Tesla reported record deliveries for Q4 2013, exceeding market expectations. This positive news led to a significant increase in the stock's acumulated return for the year."}