In [None]:
!pip install python-dotenv
!pip -q install langchain openai google-search-results tiktoken
!pip -q install kor markdownify

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
from dotenv import dotenv_values

config = dotenv_values(".env")

In [None]:
import os

os.environ["OPENAI_API_KEY"] = config.get('MY_API_KEY')

In [None]:
from typing import List, Optional

from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI

from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number

import pandas as pd
from pydantic import BaseModel, Field, validator
from kor import extract_from_documents, from_pydantic, create_extraction_chain


from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [None]:
llm = ChatOpenAI(
 model_name="gpt-3.5-turbo",
 temperature=0,
)

In [None]:
model_portfolio_signal_schema = Object(
 id="model_info",
 description="Information about a given investment model.",
 attributes=[
 Text(
 id="model_name",
 description="The name of the investment model",
 examples=[("The HCM ALP Aggresive model is designed for growth oriented investor", "HCM ALP Aggresive model"), ("The State street tactical allocation model ETF is designed to generate current income", "State street tactical allocation model ETF")],
 ),
 Number(
 id="minimum_amount",
 description="minimum required invesment amount",
 examples=[("minimum required amount is $25,000", "25000")],
 ),
 ],
 examples=[
 (
 "Vanguard total stock market index fund follows total stock market. Minimum required investment amount is $200,000. Other growth model is Vanguard Small cap growth index in which atleast $30,000 is required",
 [
 {"model_name": "Vanguard total stock market index fund", "minimum_amount": 200000},
 {"model_name": "Vanguard Small cap growth index", "minimum_amount": 30000},
 ],
 )
 ],
 many=True,
)

chain = create_extraction_chain(llm, model_portfolio_signal_schema)

In [None]:
print(chain.prompt.format_prompt(text="[user input]").to_string())

Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.

```TypeScript

model_info: Array<{ // Information about a given investment model.
 model_name: string // The name of the investment model
 minimum_amount: number // minimum required invesment amount
}>
```


Please output the extracted information in CSV format in Excel dialect. Please use a | as the delimiter. 
 Do NOT add any clarifying information. Output MUST follow the schema above. Do NOT add any additional columns that do not appear in the schema.



Input: Vanguard total stock market index fund follows total stock market. Minimum required investment amount is $200,000. Other growth model is Vanguard Small cap growth index in which atleast $30,000 is required
Output: model_name|minimum_amount
Vanguard total stock mar

In [None]:
chain.predict_and_parse(text="I can build a portfolio with 2 models Vanguard michell ETF which has $500,000 minimum limit and manny bonds ETF with $100,000 required amount ")["data"]



{'model_info': [{'model_name': 'Vanguard michell ETF',
 'minimum_amount': '500000'},
 {'model_name': 'manny bonds ETF', 'minimum_amount': '100000'}]}