In [2]:
from llm.chatbot import chat_model
from langchain.schema.messages import HumanMessage, SystemMessage

In [4]:
messages = [
     SystemMessage(
         content="""You're an assistant knowledgeable about
         healthcare. Only answer healthcare-related questions."""
     ),
     HumanMessage(content="What is Medicaid managed care?"),
]
chat_model.invoke(messages).content

"Medicaid managed care is a system where state Medicaid programs contract with managed care organizations (MCOs) to provide healthcare services to Medicaid enrollees. In this model, the MCOs assume the risk and responsibility for providing a defined set of healthcare services to enrolled Medicaid beneficiaries, often in exchange for a fixed monthly payment per member.\n\nUnder Medicaid managed care, enrollees typically choose a primary care provider (PCP) from a network of participating healthcare providers who coordinate their care. The MCOs manage the delivery of healthcare services, including authorizing and paying for medically necessary services, coordinating specialty care, and emphasizing preventive care.\n\nMedicaid managed care plans can vary in their specific designs and may include models such as:\n\n1. Health Maintenance Organizations (HMOs): Enrollees typically need to seek care from providers within the HMO's network, except in emergencies.\n\n2. Preferred Provider Organi

In [11]:
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

review_system_template_str = """Your job is to use patient
reviews to answer questions about their experience at a
hospital. Use the following context to answer questions.
Be as detailed as possible, but don't make up any information
that's not from the context. If you don't know an answer, say
you don't know.

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"], template=review_system_template_str
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"], template="{question}"
    )
)

messages = [review_system_prompt, review_human_prompt]
review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

review_prompt_template.format_messages(context=context, question=question)

[SystemMessage(content="Your job is to use patient\nreviews to answer questions about their experience at a\nhospital. Use the following context to answer questions.\nBe as detailed as possible, but don't make up any information\nthat's not from the context. If you don't know an answer, say\nyou don't know.\n\nI had a great stay!\n"),
 HumanMessage(content='Did anyone have a positive experience?')]

In [17]:
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv


load_dotenv(find_dotenv())
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
REVIEWS_CSV_PATH = "data/reviews.csv"
REVIEWS_CHROMA_PATH = "llm/chroma_data"


# loader = CSVLoader(file_path=REVIEWS_CSV_PATH, source_column="review")
# reviews = loader.load()

# reviews_vector_db = Chroma.from_documents(
#     reviews, CohereEmbeddings(), persist_directory=REVIEWS_CHROMA_PATH
# )

## Cut DataFrame 20 rows

In [20]:
import pandas as pd
visits = pd.read_csv("data/visits.csv")
visits.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9998 entries, 0 to 9997
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   patient_id             9998 non-null   int64  
 1   date_of_admission      9998 non-null   object 
 2   billing_amount         9998 non-null   float64
 3   room_number            9998 non-null   int64  
 4   admission_type         9998 non-null   object 
 5   discharge_date         9498 non-null   object 
 6   test_results           9998 non-null   object 
 7   visit_id               9998 non-null   int64  
 8   physician_id           9998 non-null   int64  
 9   payer_id               9998 non-null   int64  
 10  hospital_id            9998 non-null   int64  
 11  chief_complaint        3000 non-null   object 
 12  treatment_description  3000 non-null   object 
 13  primary_diagnosis      3000 non-null   object 
 14  visit_status           9998 non-null   object 
dtypes: f

In [21]:
visits = visits[:20]
visits.to_csv("data/visits_20.csv", index=False)

In [22]:
patient = pd.read_csv('data/patients.csv')
patient = patient[:20]
patient.to_csv("data/patients_20.csv", index=False)

In [24]:
physician = pd.read_csv("data/physicians.csv")
physician = physician[:20]
physician.to_csv("data/physician_20.csv", index=False)

We will use data visit to build relationship every data using id, something like visit_id, physician_id, payer_id, hospital_id and patient_id and review_id from data reviews.csv

## MultiVector

In [48]:
from langchain.storage import InMemoryByteStore
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import Chroma
from langchain_cohere import CohereEmbeddings
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [49]:
loaders = [
    CSVLoader("data/summary/hospital_summary.csv", source_column="summary"),
    CSVLoader("data/summary/patient_summary.csv", source_column="summary"),
    CSVLoader("data/summary/payer_summary.csv", source_column="summary"),
    CSVLoader("data/summary/physician_summary.csv", source_column="summary"),
    CSVLoader("data/summary/visit_summary.csv", source_column="summary"),
    CSVLoader("data/reviews.csv", source_column="review"),
]

docs = []
for loader in loaders:
    docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter()
docs = text_splitter.split_documents(docs)

In [51]:
# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=CohereEmbeddings(cohere_api_key=COHERE_API_KEY)
)
# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"
# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)
import uuid

doc_ids = [str(uuid.uuid4()) for _ in docs]

In [56]:
retriever.vectorstore.add_documents(docs)

In [None]:
# Vectorstore alone retrieves the small chunks
retriever.vectorstore.similarity_search("justice breyer")[0]

## Test Review Chain

In [3]:
from llm_api.src.chains.review_chain import setup_vector_chain
import time
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

query = """give me the best patient review."""
vector_chain = setup_vector_chain()
start_time = time.time()
response = vector_chain.invoke(query)
end_time = time.time()
response_time = end_time - start_time
res = response.get("result")
print(f"\nResponse time: {response_time} seconds")
print("Response :", res)

2024-05-29 19:39:36 [INFO] Setting up vector chain...
2024-05-29 19:39:36 [INFO] Load CohereEmbedding...
2024-05-29 19:39:37 [INFO] Attempting to set up Neo4j vector index from existing index...
2024-05-29 19:39:43 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"



Response time: 3.231100559234619 seconds
Response : The patient expresses gratitude for the compassionate and thorough medical care received at the hospital. The medical team is praised for their attentive and supportive nature, ensuring a comfortable and smooth recovery. The facilities are described as top-notch, modern, and well-equipped, contributing to a positive overall experience. This review highlights the excellent standard of care and the patient's satisfaction with their stay.


## Test Cypher Chain

In [14]:
from llm_api.src.chains.cyper_chain import create_cypher_qa_chain
import time
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

# question = """I need information about the visit on 2022-03-08 such as chief complaint, treatment and payment status."""
# question ="""
#     I was looking for a patient but forgot his name, 
#     I only know his date of birth is 1989-01-26 and his blood type is AB+. 
#     And is he still being treated and in which room?
# """
# question = """Which state had the largest percent increase
#            in Medicaid visits from 2022 to 2023?"""
question = """
    I want to know how many admission type emegencies there are and what their status is.
"""
# question = "how many patients did Jason Williams treat while working at the hospital"

cypher_qa_chain = create_cypher_qa_chain()
start_time = time.time()
response = cypher_qa_chain.invoke(question)
end_time = time.time()
response_time = end_time - start_time
res = response.get("result")
print(f"\nResponse time: {response_time} seconds")
print("Response :", res)



[1m> Entering new GraphCypherQAChain chain...[0m


2024-05-29 20:09:55 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"


Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.admission_type = 'Emergency'
RETURN v.admission_type, v.status, COUNT(*) AS emergency_count;
[0m
Full Context:
[32;1m[1;3m[{'v.admission_type': 'Emergency', 'v.status': 'OPEN', 'emergency_count': 174}, {'v.admission_type': 'Emergency', 'v.status': 'DISCHARGED', 'emergency_count': 3193}][0m


2024-05-29 20:09:58 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"



[1m> Finished chain.[0m

Response time: 11.51573657989502 seconds
Response : There are a total of 3367 admission-type emergencies. Of these, 174 are currently open, while 3193 have been discharged.


## Test Rag agent
**Note:** sometimes we need to rerun the code to get the best results

In [23]:
import time
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

from llm_api.src.agents.rag_agent import agent_executor

# question = {"input": "What is the wait time at Wallace-Hamilton?"}
# question = {
#          "input": (
#              "What have patients said about their "
#              "quality of rest during their stay?"
#          )
#      }

question = {
    "input": (
        "Which physician has treated the "
        "most patients covered by Cigna?"
    )
}

agent = agent_executor()
start_time = time.time()
response = agent.invoke(question)
end_time = time.time()
response_time = end_time - start_time
res = response.get("output")
print(f"\nResponse time: {response_time} seconds")
print("Response :", res)



[1m> Entering new AgentExecutor chain...[0m


2024-05-29 21:22:22 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"


[32;1m[1;3m
I will search for the physician who has treated the most patients covered by Cigna.
{'tool_name': 'Graph', 'parameters': {'tool_input': 'Which physician has treated the most patients covered by Cigna?'}}
[0m

[1m> Entering new GraphCypherQAChain chain...[0m


2024-05-29 21:22:28 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"


Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Payer)<--(:Visit)<-[t:TREATS]-(phy:Physician)
WHERE p.name = 'Cigna'
RETURN phy.name AS physician_name, COUNT(DISTINCT t) AS num_patients_treated
ORDER BY num_patients_treated DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'physician_name': 'Amy Daniels', 'num_patients_treated': 13}][0m


2024-05-29 21:22:36 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"



[1m> Finished chain.[0m
[33;1m[1;3m{'query': 'Which physician has treated the most patients covered by Cigna?', 'result': 'Amy Daniels has treated 13 patients covered by Cigna, making her the physician with the most Cigna patients treated.'}[0m

2024-05-29 21:22:37 [INFO] HTTP Request: POST https://api.cohere.ai/v1/chat "HTTP/1.1 200 OK"


[32;1m[1;3mRelevant Documents: 0
Cited Documents: 0
Answer: Amy Daniels has treated 13 patients covered by Cigna, making her the physician with the most Cigna patients treated.
Grounded answer: <co: 0>Amy Daniels</co: 0> has treated <co: 0>13 patients covered by Cigna</co: 0>, making her the physician with the most Cigna patients treated.[0m

[1m> Finished chain.[0m

Response time: 19.148507356643677 seconds
Response : Amy Daniels has treated 13 patients covered by Cigna, making her the physician with the most Cigna patients treated.
