In [2]:
import os
from langchain.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import GraphCypherQAChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from dotenv import load_dotenv
load_dotenv("../rag_api/.env")

True

In [3]:
# TODO: visit id look up function

#### Connect to Neo4j graph and add data

In [32]:
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD")
)

In [3]:
graph.query(
    "MATCH p=()-[:COVERED_BY]->() RETURN p LIMIT 25;"
)

[{'p': [{'admission_date': '2022-11-17',
    'discharge_date': '2022-12-01',
    'admission_type': 'Elective',
    'room_number': 146,
    'id': 0,
    'test_results': 'Inconclusive',
    'status': 'DISCHARGED'},
   'COVERED_BY',
   {'name': 'UnitedHealthcare', 'id': 1}]},
 {'p': [{'admission_date': '2023-06-01',
    'admission_type': 'Emergency',
    'room_number': 404,
    'id': 1,
    'test_results': 'Normal',
    'status': 'OPEN'},
   'COVERED_BY',
   {'name': 'Blue Cross', 'id': 4}]},
 {'p': [{'admission_date': '2019-01-09',
    'discharge_date': '2019-02-08',
    'admission_type': 'Emergency',
    'room_number': 292,
    'id': 2,
    'test_results': 'Normal',
    'status': 'DISCHARGED'},
   'COVERED_BY',
   {'name': 'Aetna', 'id': 2}]},
 {'p': [{'admission_date': '2020-05-02',
    'discharge_date': '2020-05-03',
    'admission_type': 'Urgent',
    'room_number': 480,
    'id': 3,
    'test_results': 'Abnormal',
    'status': 'DISCHARGED'},
   'COVERED_BY',
   {'name': 'Blue Cross

#### Create a Neo4j vector index 
- This vector index embedds specified text properties of a node to enable semantic search

In [6]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name='reviews',
    node_label="Review",
    text_node_properties=[
        'physician_name',
        'patient_name',
        'text',
        'hospital_name'
    ],
    embedding_node_property='embedding'
)

In [7]:
response = vector_index.similarity_search(
    "Are patients complaining about noise?", k = 10
)

for resp in response:
    
    print(resp.page_content)


physician_name: Jennifer Evans
patient_name: Sarah Love
text: I appreciate the excellent medical care I received. Still, the noise levels in the hallway outside my room were disruptive and affected my rest.
hospital_name: Castaneda-Hardy

physician_name: Victoria Chavez
patient_name: David Kim
text: The hospital staff was accommodating to my needs, but the noise levels in the hallway were disruptive and affected my ability to rest.
hospital_name: Wallace-Hamilton

physician_name: Timothy Gibson
patient_name: Natalie Rojas
text: The medical staff was attentive and caring, providing quality care. Unfortunately, the noise from construction outside my room was disruptive and affected my rest.
hospital_name: Garcia Ltd

physician_name: Joseph Roman
patient_name: Caleb Coleman
text: The hospital's medical team was competent, but the noise level in the ward was disruptive, affecting my ability to rest.
hospital_name: Richardson-Powell

physician_name: Ryan Jarvis
patient_name: Douglas Hickma

In [16]:
response = vector_index.similarity_search(
    "What are patinets saying about the nursing staff at Castaneda-Hardy?", k = 10
)

for resp in response:
    
    print(resp.page_content)


physician_name: Colleen Ward
patient_name: Sarah Thomas
text: The hospital's nursing staff were a mixed bag. While some were compassionate and attentive, others seemed disinterested and inattentive. Consistent training and expectations are necessary for quality patient care.
hospital_name: Castaneda-Hardy

physician_name: Denise Perez
patient_name: Tara Knight
text: The hospital provided exceptional care, and the nursing staff was incredibly supportive. However, the administrative processes were a bit convoluted, causing some confusion.
hospital_name: Castaneda-Hardy

physician_name: Jennifer Quinn
patient_name: Victoria Arroyo
text: I received excellent medical care during my stay, and the nursing staff was attentive. Unfortunately, the lack of clear communication about the discharge process caused some anxiety.
hospital_name: Castaneda-Hardy

physician_name: Nicole Keith
patient_name: Stacey Cohen
text: I received excellent care at the hospital. The staff was knowledgeable and carin

#### Create a RAG QA chain from the Neo4j vector index

In [7]:
review_template = """ 
Your job is to answer questions about hospital reviews from patients. Use
the following context to answer questions. Be as detailed as possible, but 
don't make up any information that's not from the context. If you don't know
an answer, say you don't know. 

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=['context'],
                          template=review_template)
)

review_human_prompt = HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))
messages = [review_system_prompt, review_human_prompt]

review_prompt = ChatPromptTemplate(input_variables=['context', 'question'],
                                   messages=messages)

vector_qa = RetrievalQA.from_chain_type(
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
    chain_type="stuff",
    retriever=vector_index.as_retriever(k=12)
)
vector_qa.combine_documents_chain.llm_chain.prompt = review_prompt

In [22]:
answer_stream = vector_qa.stream(
     {"query": "Are any patients upset about cleanliness?"}
)

for chunk in vector_qa.stream({"query": "Are any patients upset about cleanliness?"}):
     
     print(chunk)

{'query': 'Are any patients upset about cleanliness?', 'result': 'Yes, two patients are upset about the cleanliness of the hospital. Kara Gilbert was disappointed with the cleanliness of the hospital and mentioned that the lack of hygiene was a concern for her. Stacy Lindsey also mentioned that her stay at the hospital was marred by a lack of cleanliness in her room and that the facilities were not properly maintained.'}


In [115]:
vector_qa.run(
     "Are there reviews that mention noise?"
)

'Yes, there are multiple reviews that mention noise. William Foster, Robert Mcguire, Tyler Sanders DVM, and Timothy Avila all mentioned the noise levels in their respective hospital stays.'

#### Create a Cypher RAG chain
- Instead of performing semantic search, this chain converts natual language propmpts into Cypher queries

In [38]:
graph.refresh_schema()

from langchain.llms import Together

cypher_generation_template = """
Task:
Generate Cypher query for a Neo4j graph database.

Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{schema}

Note: 
Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement. Make sure the direction of the relationship is 
correct in your queries. Make sure you alias both entities and relationships properly.

Examples:
# Who is the oldest patient and how old are they?
MATCH (p:Patient)
RETURN p.name AS oldest_patient, 
       duration.between(date(p.dob), date()).years AS age
ORDER BY age DESC
LIMIT 1

# Which physician has billed the least to Cigna
MATCH (p:Payer)<-[c:COVERED_BY]-(v:Visit)-[t:TREATS]-(phy:Physician)
WHERE p.name = 'Cigna'
RETURN phy.name AS physician_name, SUM(c.billing_amount) AS total_billed
ORDER BY total_billed 
LIMIT 1

String category values:
Test results are one of: 'Inconclusive', 'Normal', 'Abnormal'
Visit statuses are one of: 'OPEN', 'DISCHARGED'
Admission Types are one of: 'Elective', 'Emergency', 'Urgent'
Payer names are one of: 'Cigna', 'Blue Cross', 'UnitedHealthcare', 'Medicare', 'Aetna'

A visit is considered open if it's status is 'OPEN' and the discharge date is missing

Make sure to use IS NULL or IS NOT NULL when analyzing missing properties.
Never return embedding properties in your queries. You must never include the statement "GROUP BY" in your query.

The question is:
{question}
"""

cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)

qa_generation_template = """
You are an assistant that takes the results from a Neo4j Cypher query and forms
a nice human-understandable response. The information section contains the results
of a Cypher query that was generated based on a users natural language question. 
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. 

Information:
{context}

Question:
{question}

If the provided information is empty, say that you don't know the answer.
Empty information looks like this: []

If the information is not empty, you must provide an answer.

Helpful Answer:
"""

qa_generation_prompt = PromptTemplate(
    input_variables=["context", "question"], template=qa_generation_template
)

cypher_llm = Together(
    model="togethercomputer/llama-2-70b-chat",
    temperature=0.0,
    top_k=1,
    together_api_key=os.getenv("TOGETHER_AI_API_KEY")
)

qa_llm = Together(
    model="togethercomputer/llama-2-70b-chat",
    temperature=0.0,
    top_k=1,
    together_api_key=os.getenv("TOGETHER_AI_API_KEY")
)

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
    qa_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
    graph=graph,
    verbose=True,
    qa_prompt=qa_generation_prompt,
    cypher_prompt=cypher_generation_prompt,
    validate_cypher=True
)

In [60]:
len(cypher_generation_template.split(" ")) + len(qa_generation_template.split(" "))

294

In [37]:
cypher_chain.run(
    "What are the most common diagnoses when patients complaint about pain?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
Please provide the Cypher query.[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Please': expected
  "ALTER"
  "CALL"
  "CREATE"
  "DEALLOCATE"
  "DELETE"
  "DENY"
  "DETACH"
  "DROP"
  "DRYRUN"
  "ENABLE"
  "FOREACH"
  "GRANT"
  "LOAD"
  "MATCH"
  "MERGE"
  "NODETACH"
  "OPTIONAL"
  "REALLOCATE"
  "REMOVE"
  "RENAME"
  "RETURN"
  "REVOKE"
  "SET"
  "SHOW"
  "START"
  "STOP"
  "TERMINATE"
  "UNWIND"
  "USE"
  "USING"
  "WITH" (line 2, column 1 (offset: 1))
"Please provide the Cypher query."
 ^}

In [7]:
cypher_chain.run(
    "Which physician had the largest percent decrease in visits from 2022 to 2023?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (ph:Physician)-[:TREATS]->(v:Visit)
WHERE v.admission_date STARTS WITH '2022' OR v.admission_date STARTS WITH '2023'
WITH ph, 
     SUM(CASE WHEN v.admission_date STARTS WITH '2022' THEN 1 ELSE 0 END) AS visits_2022,
     SUM(CASE WHEN v.admission_date STARTS WITH '2023' THEN 1 ELSE 0 END) AS visits_2023
WITH ph, visits_2022, visits_2023, 
     CASE WHEN visits_2022 = 0 THEN 0 
          ELSE (visits_2022 - visits_2023) * 1.0 / visits_2022 
     END AS percent_decrease
ORDER BY percent_decrease DESC
RETURN ph.name AS physician, percent_decrease
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'physician': 'Joseph Roman', 'percent_decrease': 1.0}][0m

[1m> Finished chain.[0m


'Dr. Joseph Roman experienced the largest percent decrease in visits, with a decrease of 1.0 percent from 2022 to 2023.'

In [12]:
cypher_chain.run(
    "How many patients complained of hearing problems in 2023?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Patient)-[:HAS]->(v:Visit)
WHERE v.chief_complaint CONTAINS 'hearing' AND v.admission_date >= '2023-01-01' AND v.admission_date <= '2023-12-31'
RETURN COUNT(DISTINCT p) AS patients_with_hearing_problems
[0m
Full Context:
[32;1m[1;3m[{'patients_with_hearing_problems': 1}][0m

[1m> Finished chain.[0m


'In 2023, there was one patient who complained of hearing problems.'

In [14]:
cypher_chain.run(
    "What is the average age of patients with complaints related to hearing"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Patient)-[:HAS]->(v:Visit)
WHERE v.chief_complaint CONTAINS "hearing"
RETURN AVG(duration.between(date(p.dob), date()).years) AS average_age
[0m
Full Context:
[32;1m[1;3m[{'average_age': 66.20000000000002}][0m

[1m> Finished chain.[0m


'The average age of patients with complaints related to hearing is approximately 66.2 years.'

In [107]:
# Fixed with prompting on 3.5, broken on 4, works on 4-1160
cypher_chain.run(
    "What is the total billing amount charged to a provider for 2023?"
)



[1m> Entering new GraphCypherQAChain chain...[0m


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Generated Cypher:
[32;1m[1;3mcypher
MATCH (:Hospital)-[b:BILLS]->(p:Provider)
WHERE b.billing_date >= datetime('2023-01-01T00:00:00') AND b.billing_date < datetime('2024-01-01T00:00:00')
RETURN p.name AS provider_name, sum(b.amount) AS total_billing_amount
[0m
Full Context:
[32;1m[1;3m[{'provider_name': 'Medicare', 'total_billing_amount': 173956.73228460923}, {'provider_name': 'UnitedHealthcare', 'total_billing_amount': 89229.8576933241}, {'provider_name': 'Aetna', 'total_billing_amount': 105026.32623650707}, {'provider_name': 'Cigna', 'total_billing_amount': 330029.83443260036}, {'provider_name': 'Blue Cross', 'total_billing_amount': 120020.53350910246}][0m


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m> Finished chain.[0m


'The total billing amounts charged to various providers for 2023 are as follows:\n\n- Medicare was billed a total of $173,956.73.\n- UnitedHealthcare was billed a total of $89,229.86.\n- Aetna was billed a total of $105,026.33.\n- Cigna was billed a total of $330,029.83.\n- Blue Cross was billed a total of $120,020.53.\n\nThese figures represent the billing amounts for each provider based on the data provided.'

In [108]:
# No context for a simple ass question on 3.5, broken on 4, works on 4-1160
cypher_chain.run(
    "What is the total billing amount for 2023?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (:Hospital)-[b:BILLS]->(:Provider)
WHERE b.billing_date >= datetime('2023-01-01T00:00:00') AND b.billing_date < datetime('2024-01-01T00:00:00')
RETURN sum(b.amount) AS total_billing_amount
[0m
Full Context:
[32;1m[1;3m[{'total_billing_amount': 818263.2841561434}][0m

[1m> Finished chain.[0m


'The total billing amount for 2023 is $818,263.28.'

In [109]:
# Works on 4, broken on 3.5, works on 4-1160
cypher_chain.run(
    "What is the average billing amount to medicare providers?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (:Hospital)-[b:BILLS]->(p:Provider {name: 'Medicare'})
RETURN AVG(b.amount) AS average_billing_amount
[0m
Full Context:
[32;1m[1;3m[{'average_billing_amount': 23051.684156172825}][0m

[1m> Finished chain.[0m


'The average billing amount to Medicare providers is $23,051.68.'

In [110]:
# Looks good: need to check for all
cypher_chain.run(
    "How many patients has Denise Chavez treated?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Physician {name: 'Denise Chavez'})-[:TREATS]->(v:Visit)<-[:HAS]-(patient:Patient)
RETURN COUNT(DISTINCT patient) AS patients_treated
[0m
Full Context:
[32;1m[1;3m[{'patients_treated': 16}][0m

[1m> Finished chain.[0m


'Denise Chavez has treated 16 patients.'

In [111]:
# Fixed with prompt example - works for all
cypher_chain.run(
    "Who is the youngest physician, how old are they, and how much money do they make?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Physician)
RETURN p.name AS youngest_physician, 
       duration.between(date(p.dob), date()).years AS age, 
       p.salary AS salary
ORDER BY age ASC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'youngest_physician': 'Erica George', 'age': 29, 'salary': 285571.61618225725}][0m

[1m> Finished chain.[0m


'The youngest physician is Erica George, who is 29 years old. They make a salary of approximately $285,571.62.'

In [112]:
# Fixed with prompting works for all
cypher_chain.run(
    "Give me all available information about the most recently treated patient."
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Patient)-[:HAS]->(v:Visit)-[:AT]->(h:Hospital)
WHERE v.discharge_date IS NOT NULL
RETURN p.id AS patient_id, 
       p.name AS patient_name, 
       p.dob AS patient_dob, 
       p.sex AS patient_sex, 
       p.bloodtype AS patient_bloodtype, 
       v.id AS visit_id, 
       v.room_number AS room_number, 
       v.admission_date AS admission_date, 
       v.test_results AS test_results, 
       v.discharge_date AS discharge_date, 
       v.chief_complaints AS chief_complaints, 
       v.treatment AS treatment, 
       v.diagnosis AS diagnosis, 
       h.id AS hospital_id, 
       h.name AS hospital_name
ORDER BY v.discharge_date DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'patient_id': 9721, 'patient_name': 'Angela Thomas', 'patient_dob': neo4j.time.DateTime(1985, 1, 16, 0, 0, 0, 0, tzinfo=<UTC>), 'patient_sex': 'Male', 'patient_bloodtype': 'AB+', 'visit_id': 9721, 'room_number': 405

'The most recently treated patient is Angela Thomas, with the patient ID 9721. Angela was born on January 16, 1985, and is registered as male with a blood type of AB+. Angela was admitted to room number 405 at Schultz-Powers Hospital, which is identified by hospital ID 24, on October 28, 2023. The test results from the visit were inconclusive. Unfortunately, there is no information available regarding the chief complaints, treatment, or diagnosis. Angela was discharged on November 27, 2023.'

In [113]:
# Outdated on 3.5 works on 4 and 4-1160
cypher_chain.run(
    "How many visits are missing chief complaints?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.chief_complaints IS NULL
RETURN count(v) AS missing_chief_complaints
[0m
Full Context:
[32;1m[1;3m[{'missing_chief_complaints': 6998}][0m

[1m> Finished chain.[0m


'There are 6,998 visits missing chief complaints.'

In [114]:
# Works on 4 and 4-1160
cypher_chain.run(
    "Give me all available information about the most recently treated patient who doesn't have missing treatment information"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Patient)-[:HAS]->(v:Visit)<-[:TREATS]-(ph:Physician)
WHERE v.treatment IS NOT NULL
RETURN p.id AS patient_id, 
       p.name AS patient_name, 
       p.dob AS patient_dob, 
       p.sex AS patient_sex, 
       p.bloodtype AS patient_bloodtype, 
       v.id AS visit_id, 
       v.room_number AS room_number, 
       v.admission_date AS admission_date, 
       v.test_results AS test_results, 
       v.discharge_date AS discharge_date, 
       v.chief_complaints AS chief_complaints, 
       v.treatment AS treatment, 
       v.diagnosis AS diagnosis, 
       ph.id AS physician_id, 
       ph.name AS physician_name, 
       ph.dob AS physician_dob, 
       ph.school AS physician_school, 
       ph.salary AS physician_salary
ORDER BY v.admission_date DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'patient_id': 5742, 'patient_name': 'Krystal Nguyen DDS', 'patient_dob': neo4j.time.DateTime(1969, 

"The most recently treated patient with complete treatment information is Krystal Nguyen DDS. Here are the details:\n\n- Patient ID: 5742\n- Name: Krystal Nguyen DDS\n- Date of Birth: September 14, 1969\n- Sex: Male\n- Blood Type: A-\n- Visit ID: 5742\n- Room Number: 232\n- Admission Date: October 30, 2023\n- Test Results: Abnormal\n- Discharge Date: October 31, 2023\n- Chief Complaints: Frequent bleeding from the nose\n- Treatment: Nasal cauterization, humidifiers, and addressing underlying causes such as nasal polyps or blood clotting disorders.\n- Diagnosis: R19.8 - Other specified symptoms and signs involving the digestive system and abdomen\n- Physician ID: 272\n- Physician Name: Lance Gonzalez\n- Physician Date of Birth: October 18, 1981\n- Physician School: Mayo Clinic Alix School of Medicine\n- Physician Salary: $348,731.46\n\nThis information should provide a comprehensive overview of the patient's recent treatment and the attending physician's details."

In [137]:
# Fixed by prompting on filter value on 4-1160
cypher_chain.run(
    "What percentage of visits have abnormal test results?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.test_results = 'Abnormal'
WITH COUNT(v) AS abnormal_visits
MATCH (v2:Visit)
WITH abnormal_visits, COUNT(v2) AS total_visits
RETURN (abnormal_visits * 1.0 / total_visits) * 100 AS percentage_abnormal
[0m
Full Context:
[32;1m[1;3m[{'percentage_abnormal': 34.56691338267653}][0m

[1m> Finished chain.[0m


'34.57% of visits have abnormal test results.'

In [138]:
cypher_chain.run(
    "What percentage of visits had abnormal test results in 2022?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.admission_date >= datetime('2022-01-01T00:00:00') AND v.admission_date < datetime('2023-01-01T00:00:00') AND v.test_results = 'Abnormal'
WITH COUNT(v) AS abnormalVisits
MATCH (v2:Visit)
WHERE v2.admission_date >= datetime('2022-01-01T00:00:00') AND v2.admission_date < datetime('2023-01-01T00:00:00')
WITH abnormalVisits, COUNT(v2) AS totalVisits
RETURN (toFloat(abnormalVisits) / totalVisits) * 100 AS percentageAbnormalResults
[0m
Full Context:
[32;1m[1;3m[{'percentageAbnormalResults': 34.55}][0m

[1m> Finished chain.[0m


'In 2022, 34.55% of visits had abnormal test results.'

In [139]:
# Works on 4-turbo
cypher_chain.run(
    "What is the average duration of a visit in days?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.admission_date IS NOT NULL AND v.discharge_date IS NOT NULL
RETURN avg(duration.inDays(date(v.admission_date), date(v.discharge_date)).days) AS average_duration_days
[0m
Full Context:
[32;1m[1;3m[{'average_duration_days': 15.567088607594895}][0m

[1m> Finished chain.[0m


'The average duration of a visit is approximately 15.57 days.'

In [140]:
# Works on 4-turbo
cypher_chain.run(
    "Which physician has the lowest average visit duration in days?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (ph:Physician)-[:TREATS]->(v:Visit)-[:AT]->(h:Hospital)
WHERE v.discharge_date IS NOT NULL AND v.admission_date IS NOT NULL
WITH ph, AVG(duration.inDays(date(v.admission_date), date(v.discharge_date)).days) AS avg_duration
RETURN ph.name AS physician, avg_duration
ORDER BY avg_duration ASC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'physician': 'Kristin Phillips', 'avg_duration': 8.857142857142856}][0m

[1m> Finished chain.[0m


'Dr. Kristin Phillips has the lowest average visit duration, with an average of approximately 8.86 days per visit.'

In [150]:
# Works on 4-turbo
cypher_chain.run(
    "What percentage of patients have used more than one provider?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Patient)-[:HAS]->(:Visit)-[:COVERED_BY]->(prov:Provider)
WITH p, count(DISTINCT prov) AS providerCount
WHERE providerCount > 1
WITH count(p) AS patientsWithMultipleProviders
MATCH (p:Patient)
WITH count(p) AS totalPatients, patientsWithMultipleProviders
RETURN (toFloat(patientsWithMultipleProviders) / toFloat(totalPatients)) * 100 AS percentage
[0m
Full Context:
[32;1m[1;3m[{'percentage': 4.4149652813763085}][0m

[1m> Finished chain.[0m


'4.41% of patients have used more than one provider.'

In [162]:
cypher_chain.run(
    "How many visits are open and what is their average duration in days?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.status = 'OPEN' AND v.discharge_date IS NULL
RETURN COUNT(v) AS open_visits, 
       AVG(duration.inDays(date(v.admission_date), date()).days) AS average_duration
[0m
Full Context:
[32;1m[1;3m[{'open_visits': 500, 'average_duration': 183.41599999999983}][0m

[1m> Finished chain.[0m


'Currently, there are 500 open visits, and the average duration of these visits is approximately 183.42 days.'

In [166]:
cypher_chain.run(
    "What is the total billing amount for open visits?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"Based on the information provided, I don't know the answer to the total billing amount for open visits."

In [167]:
cypher_chain.run(
    "What percentage of visits were an emergency in 2022?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Visit)
WHERE v.admission_date >= datetime('2022-01-01T00:00:00') AND v.admission_date < datetime('2023-01-01T00:00:00') AND v.chief_complaints = 'Emergency'
WITH COUNT(v) AS emergency_visits
MATCH (v2:Visit)
WHERE v2.admission_date >= datetime('2022-01-01T00:00:00') AND v2.admission_date < datetime('2023-01-01T00:00:00')
WITH emergency_visits, COUNT(v2) AS total_visits
RETURN (emergency_visits * 1.0 / total_visits) * 100 AS emergency_percentage
[0m
Full Context:
[32;1m[1;3m[{'emergency_percentage': 0.0}][0m

[1m> Finished chain.[0m


'In 2022, the percentage of visits that were an emergency was 0.0%.'

#### Knowledge graph agent
Use an agent to combine the unstrucuted text RAG with Cypher RAG dynamically

In [58]:
from typing import Any
import asyncio
import sys
import numpy as np
from langchain.callbacks.streaming_aiter_final_only import AsyncFinalIteratorCallbackHandler
from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler
from langchain.schema import LLMResult


def get_current_wait_times(hospital: str) -> str | float:
    
    """ TODO """
    
    graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"))
    
    current_hospitals = graph.query("""
                                    MATCH (h:Hospital)
                                    RETURN h.name AS hospital_name 
                                    """)
    
    current_hospitals = [d["hospital_name"].lower() for d in current_hospitals]
    
    if hospital.lower() not in current_hospitals:
        
        return f"Hospital '{hospital}' does not exist"
    
    return np.random.randint(low=0, high=600)
            
tools = [
    Tool(
        name="Reviews",
        func=vector_qa.run,
        description="""
        Useful when you need to answer questions about patient reviews or experiences. 
        Not useful for answering objective questions that involve counting or aggregation.
        Use full question as input.
        """,
    ),
    Tool(
        name="Graph",
        func=cypher_chain.run,
        description="""
        Useful for answering questions about patients, physiscians, hospitals, insurance 
        payers, and hospital visit details. Use full question as input.
        """,
    ),
     Tool(
        name="Waits",
        func=get_current_wait_times,
        description="""
        Use when asked about current wait times at a hospital. This tool can only get
        the current wait time at a hospital and does not have any information about
        aggregate or historical wait times. This tool returns wait times in minutes.
        Only pass the hospital name as input.
        """,
    )
]

hospital_rag_agent = initialize_agent(
    tools,
    ChatOpenAI(model="gpt-3.5-turbo-1106", streaming=True, temperature=0, callbacks=[FinalStreamingStdOutCallbackHandler()]),
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True
)

In [40]:
hospital_rag_agent.run("How long is the wait at wallace-hamilton in hours?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Waits` with `wallace-hamilton`


[0m[38;5;200m[1;3m342[0m[32;1m[1;3mThe current wait time at Wallace-Hamilton is 342 minutes, which is approximately 5.7 hours.[0m

[1m> Finished chain.[0m


'The current wait time at Wallace-Hamilton is 342 minutes, which is approximately 5.7 hours.'

In [41]:
hospital_rag_agent.run("What are patinets saying about the nursing staff at Castaneda-Hardy?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Reviews` with `What are patients saying about the nursing staff at Castaneda-Hardy?`


[0m[36;1m[1;3mPatients have mentioned that the nursing staff at Castaneda-Hardy is attentive and provides excellent medical care.[0m[32;1m[1;3mPatients have mentioned that the nursing staff at Castaneda-Hardy is attentive and provides excellent medical care.[0m

[1m> Finished chain.[0m


'Patients have mentioned that the nursing staff at Castaneda-Hardy is attentive and provides excellent medical care.'

In [42]:
hospital_rag_agent.run("What is the total billing amount charged to each payer?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `total billing amount charged to each payer`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Payer)<-[c:COVERED_BY]-(v:Visit)
RETURN p.name AS payer_name, SUM(c.billing_amount) AS total_billing_amount
ORDER BY total_billing_amount DESC[0m
Full Context:
[32;1m[1;3m[{'payer_name': 'UnitedHealthcare', 'total_billing_amount': 52221646.735484034}, {'payer_name': 'Cigna', 'total_billing_amount': 52161684.466568835}, {'payer_name': 'Blue Cross', 'total_billing_amount': 51440502.95502345}, {'payer_name': 'Medicare', 'total_billing_amount': 50464204.49895314}, {'payer_name': 'Aetna', 'total_billing_amount': 48806964.28556202}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThe total billing amount charged to each payer is as follows:
- UnitedHealthcare: $52,221,646.74
- Cigna: $52,161,684.47
- Blue Cross: $51,440,502.96
- Medicare: $50,464,204.50
- Aetna: $48,8

'The total billing amount charged to each payer is as follows:\n- UnitedHealthcare: $52,221,646.74\n- Cigna: $52,161,684.47\n- Blue Cross: $51,440,502.96\n- Medicare: $50,464,204.50\n- Aetna: $48,806,964.29'

In [43]:
hospital_rag_agent.run("What is the total billing amount charged to each payer for 2023?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `total billing amount charged to each payer for 2023`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Payer)<-[c:COVERED_BY]-(v:Visit)
WHERE c.service_date STARTS WITH '2023'
RETURN p.name AS payer_name, SUM(c.billing_amount) AS total_billing_amount[0m
Full Context:
[32;1m[1;3m[{'payer_name': 'Medicare', 'total_billing_amount': 5902495.725205273}, {'payer_name': 'UnitedHealthcare', 'total_billing_amount': 6359067.333282183}, {'payer_name': 'Aetna', 'total_billing_amount': 5577131.580994591}, {'payer_name': 'Cigna', 'total_billing_amount': 6999482.788463744}, {'payer_name': 'Blue Cross', 'total_billing_amount': 5249587.508591171}][0m

[1m> Finished chain.[0m
[33;1m[1;3mIn 2023, the total billing amount charged to each payer is as follows:
- Medicare: $5,902,495.73
- UnitedHealthcare: $6,359,067.33
- Aetna: $5,577,131.58
- Cigna: $6,999,482.79
- 

'In 2023, the total billing amount charged to each payer is as follows:\n- Medicare: $5,902,495.73\n- UnitedHealthcare: $6,359,067.33\n- Aetna: $5,577,131.58\n- Cigna: $6,999,482.79\n- Blue Cross: $5,249,587.51'

In [44]:
hospital_rag_agent.run("What is the average billing amount to medicare payers?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `average billing amount to medicare payers`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Payer{name:'Medicare'})<-[c:COVERED_BY]-(v:Visit)
RETURN AVG(c.billing_amount) AS average_billing_amount[0m
Full Context:
[32;1m[1;3m[{'average_billing_amount': 25244.724611782458}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThe average billing amount to Medicare payers is $25,244.72.[0m[32;1m[1;3mThe average billing amount to Medicare payers is $25,244.72.[0m

[1m> Finished chain.[0m


'The average billing amount to Medicare payers is $25,244.72.'

In [45]:
hospital_rag_agent.run("How many patients has doctor Ryan Brown treated?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `How many patients has doctor Ryan Brown treated?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Physician {name: 'Ryan Brown'})-[:TREATS]->(v:Visit)<-[:HAS]-(pt:Patient)
RETURN COUNT(DISTINCT pt) AS number_of_patients_treated[0m
Full Context:
[32;1m[1;3m[{'number_of_patients_treated': 21}][0m

[1m> Finished chain.[0m
[33;1m[1;3mDoctor Ryan Brown has treated 21 patients.[0m[32;1m[1;3mDoctor Ryan Brown has treated 21 patients.[0m

[1m> Finished chain.[0m


'Doctor Ryan Brown has treated 21 patients.'

In [46]:
hospital_rag_agent.run("Which physician has the lowest average visit duration in days?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `Which physician has the lowest average visit duration in days?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (phy:Physician)-[:TREATS]->(v:Visit)
WHERE v.discharge_date IS NOT NULL
WITH phy, duration.between(date(v.admission_date), date(v.discharge_date)).days AS visit_duration
RETURN phy.name AS physician_name, AVG(visit_duration) AS average_visit_duration
ORDER BY average_visit_duration
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'physician_name': 'Amber Parker MD', 'average_visit_duration': 7.333333333333334}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThe physician with the lowest average visit duration in days is Amber Parker MD, with an average visit duration of 7.33 days.[0m[32;1m[1;3mThe physician with the lowest average visit duration in days is Amber Parker MD, with an average visit duration of 7.33 days.[0m

[1m> Finished chain.[0m


'The physician with the lowest average visit duration in days is Amber Parker MD, with an average visit duration of 7.33 days.'

In [47]:
hospital_rag_agent.run(
    "How many visits are open and what is their average duration in days?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `How many visits are open and what is their average duration in days?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (v:Visit)
WHERE v.status = 'OPEN' AND v.discharge_date IS NULL
WITH COUNT(v) AS open_visits
MATCH (v:Visit)
WHERE v.status = 'OPEN' AND v.discharge_date IS NULL
RETURN open_visits, AVG(duration.between(date(v.admission_date), date()).days) AS average_duration[0m
Full Context:
[32;1m[1;3m[{'open_visits': 500, 'average_duration': 14.61200000000001}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThere are currently 500 open visits, with an average duration of 14.612 days.[0m[32;1m[1;3mThere are currently 500 open visits, with an average duration of 14.612 days.[0m

[1m> Finished chain.[0m


'There are currently 500 open visits, with an average duration of 14.612 days.'

In [48]:
hospital_rag_agent.run(
    "Have any patients complained about noise?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Reviews` with `Have any patients complained about noise?`


[0m[36;1m[1;3mYes, two patients have complained about noise. Sarah Love and David Kim both mentioned that the noise levels in the hallway were disruptive and affected their ability to rest.[0m[32;1m[1;3mYes, two patients have complained about noise. Sarah Love and David Kim both mentioned that the noise levels in the hallway were disruptive and affected their ability to rest.[0m

[1m> Finished chain.[0m


'Yes, two patients have complained about noise. Sarah Love and David Kim both mentioned that the noise levels in the hallway were disruptive and affected their ability to rest.'

In [49]:
hospital_rag_agent.run(
    "Have any patients complained about wait times?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Reviews` with `complaints about wait times`


[0m[36;1m[1;3mThere are several complaints about wait times in the reviews:

1. Lisa Lowery was disappointed with the long wait times in the emergency room at Schultz-Powers. She felt like hours passed before she received any attention, and the staff seemed overwhelmed with the number of patients.

2. Dan Potter mentioned that the wait times for tests and results were quite long at Shea LLC. He felt that it added unnecessary stress to an already challenging situation.

3. Alexander Sellers had an unsatisfactory experience at Lewis-Nelson hospital. He found the wait times unbearable and was frustrated by the lack of communication from the staff. He believes that improvement is needed in their efficiency.

4. Jennifer Russell also mentioned frustrating wait times for tests and results at Wallace-Hamilton hospital. She believes that improvement in this area would great

'Yes, there have been several complaints about wait times from patients. Some of the common complaints include long wait times in the emergency room, for tests and results, and a lack of communication from the staff. These complaints highlight the need for improvement in efficiency and communication to enhance the overall patient experience.'

In [50]:
# Fixed
hospital_rag_agent.run(
    "How much was billed for patient 789's stay?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `How much was billed for patient 789's stay?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Patient {id: 789})-[:HAS]->(v:Visit)
MATCH (v)-[c:COVERED_BY]->(payer:Payer)
RETURN SUM(c.billing_amount) AS total_billed[0m
Full Context:
[32;1m[1;3m[{'total_billed': 8749.923730892473}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThe total amount billed for patient 789's stay is $8749.92.[0m[32;1m[1;3mThe total amount billed for patient 789's stay is $8749.92.[0m

[1m> Finished chain.[0m


"The total amount billed for patient 789's stay is $8749.92."

In [51]:
# Fixed with example in prompt
hospital_rag_agent.run(
    "Which physician has billed the most to medicare?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `Which physician has billed the most to medicare?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Payer)<-[c:COVERED_BY]-(v:Visit)-[t:TREATS]-(phy:Physician)
WHERE p.name = 'Medicare'
RETURN phy.name AS physician_name, SUM(c.billing_amount) AS total_billed
ORDER BY total_billed DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'physician_name': 'Kyle Campbell', 'total_billed': 382313.91613655945}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThe physician who has billed the most to Medicare is Kyle Campbell, with a total billed amount of $382,313.92.[0m[32;1m[1;3mThe physician who has billed the most to Medicare is Kyle Campbell, with a total billed amount of $382,313.92.[0m

[1m> Finished chain.[0m


'The physician who has billed the most to Medicare is Kyle Campbell, with a total billed amount of $382,313.92.'

'gpt-3.5-turbo'