In [1]:
# !pip install python-dotenv
# !pip install openai

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0
Collecting openai
  Using cached openai-0.27.8-py3-none-any.whl (73 kB)
Collecting requests>=2.20 (from openai)
  Using cached requests-2.31.0-py3-none-any.whl (62 kB)
Collecting tqdm (from openai)
  Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)
Collecting aiohttp (from openai)
  Using cached aiohttp-3.8.4-cp38-cp38-macosx_11_0_arm64.whl (337 kB)
Collecting charset-normalizer<4,>=2 (from requests>=2.20->openai)
  Downloading charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.0/123.0 kB[0m [31m541.2 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting idna<4,>=2.5 (from requests>=2.20->openai)
  Using cached idna-3.4-py3-none-any.whl (61 kB)
Collecting urllib3<3,>=1.21.1 (from requests>=2.20->openai)
  Using cached urllib

In [2]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [6]:
# !pip install --upgrade langchain

In [7]:
from langchain.chat_models import ChatOpenAI

# To control the randomness and creativity of the generated
# text by an LLM, use temperature = 0.0
chat = ChatOpenAI(temperature=0.0)

In [53]:
template_string = """
    {final_text}

    1. What are the major diagnoses in the above admission. 

    {format_instructions}
    """

In [54]:
from langchain.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_template(template_string)

In [55]:
print(prompt_template.messages[0].prompt)
print(prompt_template.messages[0].prompt.input_variables)

input_variables=['final_text', 'format_instructions'] output_parser=None partial_variables={} template='\n    {final_text}\n\n    1. What are the major diagnoses in the above admission. \n\n    {format_instructions}\n    ' template_format='f-string' validate_template=True
['final_text', 'format_instructions']


In [77]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

diagnosis = ResponseSchema(name="Diagnosis",
                             description="Diagnosis extracted from the patient note")
icd10 = ResponseSchema(name="ICD10",
                                      description="ICD10 code of the diagnosis")
primary_diagnosis = ResponseSchema(name="Primary Diagnosis",
                                    description="Whether the given diagnosis is the primary diagnosis")

response_schemas = [diagnosis, 
                    icd10,
                    primary_diagnosis]

In [85]:
STRUCTURED_FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
[{{
{format}
}}]
```"""

line_template = '\t"{name}": {type}  // {description}'

def _get_sub_string(schema: ResponseSchema) -> str:
    return line_template.format(
        name=schema.name, description=schema.description, type=schema.type
    )

class CustomOutputParser(StructuredOutputParser):

    def get_format_instructions(self) -> str:
        schema_str = "\n".join(
            [_get_sub_string(schema) for schema in self.response_schemas]
        )
        return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str)

In [87]:
output_parser = CustomOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [90]:
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
[{
	"Diagnosis": string  // Diagnosis extracted from the patient note
	"ICD10": string  // ICD10 code of the diagnosis
	"Primary Diagnosis": string  // Whether the given diagnosis is the primary diagnosis
}]
```


In [91]:
prompt = ChatPromptTemplate.from_template(template=template_string)
messages = prompt.format_messages(final_text='Patient admitted with CHF and UTI', 
                                format_instructions=format_instructions)

In [30]:
import pandas as pd
dx_json = [{
    'Diagnosis':'CHF',
    'ICD10':'I50.9',
    'Primary Diagnosis':True
}]
pd.DataFrame(dx_json)

Unnamed: 0,Diagnosis,ICD10,Primary Diagnosis
0,CHF,I50.9,True


In [92]:
response = chat(messages)

In [93]:
output_dict = output_parser.parse(response.content)

OutputParserException: Got invalid return object. Expected key `Diagnosis` to be present, but got [{'Diagnosis': 'CHF', 'ICD10': 'I50.9', 'Primary Diagnosis': 'Yes'}, {'Diagnosis': 'UTI', 'ICD10': 'N39.0', 'Primary Diagnosis': 'No'}]

In [72]:
print(response.content)

```json
{
	"Diagnosis": "CHF",
	"ICD10": "I50.9",
	"Primary Diagnosis": "Yes"
},
{
	"Diagnosis": "UTI",
	"ICD10": "N39.0",
	"Primary Diagnosis": "No"
}
```


In [74]:
print(messages[0].content)


    Patient admitted with CHF and UTI

    1. What are the major diagnoses in the above admission. 

    The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"Diagnosis": string  // Diagnosis extracted from the patient note
	"ICD10": string  // ICD10 code of the diagnosis
	"Primary Diagnosis": string  // Whether the given diagnosis is the primary diagnosis
}
```
    
