In [1]:
from pydantic.v1 import BaseModel, Field
from langchain.agents import tool
import requests, json
import numpy as np
import random

class DoctorSearch(BaseModel):
    zip_code: str = Field(..., description="Zip code in which to search for doctors")
        
@tool(args_schema=DoctorSearch)
def get_doctors(zip_code: str) -> list:
    """Get doctors for the given zip code"""
    url = "https://fhir.cigna.com/ProviderDirectory/v1/Location?address-postalcode=" + zip_code
    jsonString = requests.get(url)
    addresses = []
    data = json.loads(jsonString.content)
    data = data['entry']
    for entry in data:
        address = entry['resource']['address']['line']
        addresses = addresses + address

    return addresses

In [2]:
from typing import Optional
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from datetime import date

class Claim(BaseModel):
    """Information about a doctor visit."""
    hospital_name: str = Field(description="Hospital name")
    visit_date: date = Field(description="date of visit")
        
extraction_function = [convert_pydantic_to_openai_function(Claim)]
#extraction_model = model.bind(functions=extraction_function, function_call={"name": "Information"})

  warn_deprecated(


In [3]:
from langchain_community.document_loaders.image import UnstructuredImageLoader

class FileClaim(BaseModel):
    file_name: str = Field(..., description="File to get claim information from")

@tool(args_schema=FileClaim)
def process_file(file_name: str) -> dict:
    """Upload file and extract text"""
    with open(file_name) as f:
        loader = UnstructuredImageLoader(file_name)
        data = loader.load()
        model = ChatOpenAI(temperature=0)
        extraction_model = model.bind(functions=extraction_function, function_call={"name": "Claim"})
        prompt = ChatPromptTemplate.from_messages([
            ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
            ("human", "{input}")
        ])
        extraction_chain = prompt | extraction_model
        response = extraction_chain.invoke({"input": data[0]})
        return response
    return None
        

In [4]:
@tool
def get_claim_history():
    """
    Get past claims raised
    """
    num_claims = np.random.randint(2, 6)
    print(num_claims)
    claims = []
    for _ in range(num_claims):
        claim_id = np.random.randint(1000000, 2000000)
        claim_type = random.choice(["Hospitalization", "Out Patient", "Pharmacy"])
        claim_status = random.choice(["Received", "Processed - Approved", "Processed - Denied"])
        claim_amount = np.random.randint(200, 500)
        claims.append((claim_id, claim_type, claim_status, claim_amount))
    return claims

In [5]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.chains.query_constructor.base import AttributeInfo
from langchain_community.chat_models import ChatOpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel

In [6]:
class RAG:
    def __init__(self):
        os.environ['PINECONE_API_KEY'] = "d21b8a7f-aa94-484e-8ca7-48eb3d1f0b11"
        index_name = "insuranceplans"
        embeddings = OpenAIEmbeddings()
        vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

        metadata_field_info = [
            AttributeInfo(
                name="filename",
                description="The file the chunk is from. Is also the plan name",
                type="string",
            ),
        ]

        document_content_description = "Cigna Plan Information"
        model = ChatOpenAI(model="gpt-4o", temperature=0)


        retriever = SelfQueryRetriever.from_llm(
            llm=model,
            document_contents=document_content_description,
            metadata_field_info=metadata_field_info,
            vectorstore=vectorstore,
            verbose=False
        )

        # Prompt template
        template = """Answer the question based only on the following context, 
        which can include text and tables: {context}
        use the Question: {query}
        """
        prompt = ChatPromptTemplate.from_template(template)

        self.chain = RunnableParallel(
            {
                'context': lambda x: retriever.get_relevant_documents(x["query"]),
                'query': lambda x: x["query"],
                #'plan_name': lambda x: x["plan_name"]
            }) | prompt | model | StrOutputParser()

    def get_chain(self):
        return self.chain

class PlanInformation(BaseModel):
    question: str = Field(..., description="The query to answer on the given plan_name")
        
@tool(args_schema=PlanInformation)
def get_plan_information(question: str) -> str:
    """
    Will take the plan_name and the question from the user and answer the question using
    RAG chain created on plan documents
    """
    chain = RAG().get_chain()
    print(type(chain))
    return chain.invoke({"query": question})

In [7]:
from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.format_scratchpad import format_to_openai_functions

functions = [
    format_tool_to_openai_function(f) for f in [
        get_plan_information, get_doctors, get_claim_history, process_file
    ]
] + extraction_function
model = ChatOpenAI(temperature=0).bind(functions=functions)
response = model.invoke("./EavRM2VU8AEIZsP.jpeg")

  warn_deprecated(
  warn_deprecated(


In [8]:
response

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"file_name":"./EavRM2VU8AEIZsP.jpeg"}', 'name': 'process_file'}}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 250, 'total_tokens': 277}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-b842949a-7aa7-4a4a-b0b0-4fb3a38d816b-0')

In [9]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.agent import AgentFinish
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are helpful but sassy assistant"),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

chain = prompt | model | OpenAIFunctionsAgentOutputParser()

agent_chain = RunnablePassthrough.assign(
    agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"])
) | chain

"""def run_agent(user_input):
    intermediate_steps = []
    while True:
        result = agent_chain.invoke({
            "query": user_input, 
            "intermediate_steps": intermediate_steps
        })
        if isinstance(result, AgentFinish):
            return result
        tool = {
            "get_doctors": get_doctors, 
            "get_plan_information": get_plan_information,
        }[result.tool]
        observation = tool.run(result.tool_input)
        intermediate_steps.append((result, observation))"""

'def run_agent(user_input):\n    intermediate_steps = []\n    while True:\n        result = agent_chain.invoke({\n            "query": user_input, \n            "intermediate_steps": intermediate_steps\n        })\n        if isinstance(result, AgentFinish):\n            return result\n        tool = {\n            "get_doctors": get_doctors, \n            "get_plan_information": get_plan_information,\n        }[result.tool]\n        observation = tool.run(result.tool_input)\n        intermediate_steps.append((result, observation))'

In [10]:
from langchain.agents import AgentExecutor
tools = [get_doctors, get_plan_information, get_claim_history, process_file]

from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")

agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory)

### Use langchain to read text from images and pydantic to extract the data we need

In [14]:
response = agent_executor.invoke({"input": "./EavRM2VU8AEIZsP.jpeg"}, return_only_outputs=True)
print(response.get('output'))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `process_file` with `{'file_name': './EavRM2VU8AEIZsP.jpeg'}`


[0m[36;1m[1;3mcontent='' additional_kwargs={'function_call': {'arguments': '{"hospital_name":"Alta Bates Emergency","visit_date":"6/6/2019"}', 'name': 'Claim'}} response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 605, 'total_tokens': 624}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-208be274-516f-45b8-8e27-5105240284b9-0'[0m[32;1m[1;3mI have extracted the following information from the file:
- Hospital Name: Alta Bates Emergency
- Visit Date: 6/6/2019

Is there anything else you would like to know or do with this information?[0m

[1m> Finished chain.[0m
I have extracted the following information from the file:
- Hospital Name: Alta Bates Emergency
- Visit Date: 6/6/2019

Is there anything else you would like to know or do with this information?


### We have a pinecone vector database in the background. get_plan_information tool invokes uses the pinecone database to get relevent information from documents and then Passes to OpenAI to create response

In [15]:
response = agent_executor.invoke({"input": "Tell me about Connect Silver 1500 Indiv Med Deductible"}, return_only_outputs=True)
print(response.get('output'))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_plan_information` with `{'question': 'Tell me about Connect Silver 1500 Indiv Med Deductible'}`


[0m<class 'langchain_core.runnables.base.RunnableSequence'>
[33;1m[1;3mThe Connect Silver 1500 Indiv Med Deductible is a health plan offered by Cigna HealthCare of North Carolina, Inc. for the coverage period from January 1, 2024, to December 31, 2024. It is available for both individuals and families and is classified as an HMO (Health Maintenance Organization) plan. 

The Summary of Benefits and Coverage (SBC) document for this plan provides details on how costs for covered health care services are shared between the plan and the insured. However, information about the cost of the plan, referred to as the premium, is provided separately. 

For more detailed information about the coverage or to obtain a complete copy of the terms of coverage, you can call 1-866-494-2111 or visit the website at https://www.cign

In [17]:
response = agent_executor.invoke({"input": "What is the overall deductible Connect Silver 1500 Indiv Med Deductible"}, return_only_outputs=True)
print(response.get('output'))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_plan_information` with `{'question': 'What is the overall deductible Connect Silver 1500 Indiv Med Deductible'}`


[0m<class 'langchain_core.runnables.base.RunnableSequence'>
[33;1m[1;3mThe overall deductible for Connect Silver 1500 Indiv Med Deductible is $1,500.[0m[32;1m[1;3mThe overall deductible for Connect Silver 1500 Indiv Med Deductible is $1,500.[0m

[1m> Finished chain.[0m
The overall deductible for Connect Silver 1500 Indiv Med Deductible is $1,500.


### Uses Cigna Location API to get the address of doctors in the area

In [18]:
response = agent_executor.invoke({"input": "Get doctors in 28027"}, return_only_outputs=True)
print(response.get('output'))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_doctors` with `{'zip_code': '28027'}`


[0m[36;1m[1;3m['10030 EDISON SQUARE DR NW # 100A', '5325 VINNING ST NW # 101', '8815 CHRISTENBURY PKWY # 30', '8230 POPLAR TENT RD # 103', '10030 EDISON SQUARE DR NW # 100-A', '10030 EDISON SQUARE DR NW # 100-A', '10030 EDISON SQUARE DR NW # 100A', '10030 EDISON SQUARE DR NW # 100-A', '10030 EDISON SQUARE DR NW # 100-A', '7752 GATEWAY LN NW # 100', '8815 CHRISTENBURY PKWY # 30', '5641 POPLAR TENT RD # 101', '10030 EDISON SQUARE DR NW # 100', '5325 VINNING ST NW # 101', '5310 LANGFORD AVE NW', '8815 CHRISTENBURY PKWY # 30', '280 CONCORD PKWY S', '9955 POPLAR TENT RD', '7752 GATEWAY LN NW # 100', '369 CONCORD PKWY N'][0m[32;1m[1;3mHere are some doctors in the 28027 area:
1. 10030 EDISON SQUARE DR NW # 100A
2. 5325 VINNING ST NW # 101
3. 8815 CHRISTENBURY PKWY # 30
4. 8230 POPLAR TENT RD # 103
5. 10030 EDISON SQUARE DR NW # 100-A
6. 7752 GATEWAY LN NW # 100
7. 5641 PO

### Uses a dummary tool (get_claim_history) that creates randown claim history for LLM to retrieve

In [19]:
response = agent_executor.invoke({"input": "What claims have I raised this year?"}, return_only_outputs=True)
print(response.get('output'))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_claim_history` with `{}`


[0m4
[38;5;200m[1;3m[(1759320, 'Out Patient', 'Processed - Denied', 410), (1805351, 'Out Patient', 'Processed - Approved', 439), (1867415, 'Out Patient', 'Processed - Denied', 309), (1695292, 'Pharmacy', 'Processed - Approved', 322)][0m[32;1m[1;3mYou have raised the following claims this year:
1. Claim ID: 1759320, Type: Out Patient, Status: Processed - Denied, Amount: $410
2. Claim ID: 1805351, Type: Out Patient, Status: Processed - Approved, Amount: $439
3. Claim ID: 1867415, Type: Out Patient, Status: Processed - Denied, Amount: $309
4. Claim ID: 1695292, Type: Pharmacy, Status: Processed - Approved, Amount: $322[0m

[1m> Finished chain.[0m
You have raised the following claims this year:
1. Claim ID: 1759320, Type: Out Patient, Status: Processed - Denied, Amount: $410
2. Claim ID: 1805351, Type: Out Patient, Status: Processed - Approved, Amount: $439
3. Claim ID: 1867415,