# Pydantic to output multiple observations

In [1]:
import sys
sys.path.append('../../modules')

## Load model

In [2]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-3.5-turbo-0125")

## Load prompt

In [3]:
from langchain_core.prompts import load_prompt

path = '../prompts/ES.json'
prompt = load_prompt(path)
prompt

PromptTemplate(input_variables=['TABLA'], template='La siguiente TABLA representa el retorno acumulado anual de activos financieros.\n\nTABLA:\n\n{TABLA}\n\nPor favor, analize y explique el por qué de los valores más significativos, tanto positivos como negativos, en base a noticias.\n\nProporcione el enlace a las noticias que respaldan su análisis.')

## Custom Output Parser

Create a module `models.py` to store the Pydantic models (see [module](../../modules/models.py)).

### Import models

In [4]:
import models

### Associate Pydantic model with parser

In [5]:
from langchain_core.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=models.NewsSet)

### Add parser to prompt

In [6]:
prompt.template = prompt.template + '\n\n{format_instructions}\n'
prompt.template 

'La siguiente TABLA representa el retorno acumulado anual de activos financieros.\n\nTABLA:\n\n{TABLA}\n\nPor favor, analize y explique el por qué de los valores más significativos, tanto positivos como negativos, en base a noticias.\n\nProporcione el enlace a las noticias que respaldan su análisis.\n\n{format_instructions}\n'

In [7]:
prompt.partial_variables.update({"format_instructions": parser.get_format_instructions()})
prompt

PromptTemplate(input_variables=['TABLA'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"news": {"title": "News", "description": "list of news", "type": "array", "items": {"$ref": "#/definitions/News"}}}, "required": ["news"], "definitions": {"News": {"title": "News", "type": "object", "properties": {"stock": {"title": "Stock", "description": "stock symbol", "type": "string"}, "date": {"title": "Date", "description": "date of the news", "type": "string"}, "value": {"title": "Value", "description": "significant acumulated re

## Chain

### Define chain

In [8]:
chain = prompt | model | parser

### Preprocess input data

In [9]:
import utils

In [10]:
df = utils.calculate_yearly_returns(
    stocks=['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'TSLA'],
)

[*********************100%%**********************]  5 of 5 completed
  .groupby(df.index.year).pct_change().add(1)


### Invoke chain

In [11]:
output = chain.invoke({"TABLA": df})

## Preprocess output

### Format to dictionary

In [12]:
data = output.dict()
news = data['news']
news

[{'stock': 'TSLA',
  'date': '2013-12-31',
  'value': 325.424311,
  'title': 'Tesla Model S named Car of the Year',
  'url': 'https://www.cnbc.com/2013/12/03/tesla-model-s-named-car-of-the-year.html',
  'source': 'CNBC',
  'explanation': "The significant increase in Tesla's return can be attributed to the positive reception and award received by the Tesla Model S, which boosted investor confidence in the company's future success."},
 {'stock': 'AAPL',
  'date': '2019-12-31',
  'value': 88.742469,
  'title': 'Apple reports record-breaking iPhone sales',
  'url': 'https://www.apple.com/newsroom/2019/01/apple-reports-first-quarter-results/',
  'source': 'Apple Newsroom',
  'explanation': "Apple's impressive return in 2019 was driven by the company's announcement of record-breaking iPhone sales, indicating strong consumer demand for their products."},
 {'stock': 'AMZN',
  'date': '2015-12-31',
  'value': 119.074933,
  'title': 'Amazon Prime Day sales surpass expectations',
  'url': 'https:

### Transform into DataFrame

In [13]:
import pandas as pd

df = pd.DataFrame(news)
df

Unnamed: 0,stock,date,value,title,url,source,explanation
0,TSLA,2013-12-31,325.424311,Tesla Model S named Car of the Year,https://www.cnbc.com/2013/12/03/tesla-model-s-...,CNBC,The significant increase in Tesla's return can...
1,AAPL,2019-12-31,88.742469,Apple reports record-breaking iPhone sales,https://www.apple.com/newsroom/2019/01/apple-r...,Apple Newsroom,Apple's impressive return in 2019 was driven b...
2,AMZN,2015-12-31,119.074933,Amazon Prime Day sales surpass expectations,https://www.cnbc.com/2015/07/16/amazon-prime-d...,CNBC,The sharp increase in Amazon's return can be l...
3,MSFT,2019-12-31,58.259237,Microsoft Azure revenue grows by 62%,https://news.microsoft.com/2019/10/23/microsof...,Microsoft News,Microsoft's substantial return in 2019 can be ...
4,GOOGL,2013-12-31,54.954727,Google acquires Nest Labs,https://www.forbes.com/sites/connieguglielmo/2...,Forbes,The significant increase in Google's return ca...
5,TSLA,2020-12-31,707.395544,Tesla joins S&P 500 index,https://www.reuters.com/article/us-tesla-stock...,Reuters,Tesla's exceptional return in 2020 can be attr...


### Export to Excel

In [16]:
df.to_excel('reports/news.xlsx', index=False)