In [None]:
import pandas as pd
from openai import OpenAI
# import graph database driver
from neo4j import GraphDatabase
# import the OpenAI ChatOpenAI class
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key="",
    temperature=0.0,
    max_tokens=None
    )


df = pd.read_csv('CMG_EDA_1.csv')
# Return empty string if the value is NaN, apply to columns 'Head text', 'Relation text' and 'Tail text'
df['Head text'] = df['Head text'].apply(lambda x: '' if pd.isnull(x) else x)
df['Relation text'] = df['Relation text'].apply(lambda x: '' if pd.isnull(x) else x)
df['Tail text'] = df['Tail text'].apply(lambda x: '' if pd.isnull(x) else x)

# embedding = client.embeddings.create()

client = OpenAI(api_key="")
# connect to the Neo4j database
uri = "bolt://localhost:54621"
driver = GraphDatabase.driver(uri, auth=("neo4j", "saiyan94"))

# Generate embeddings for each row in the dataframe, using text-embedding-3-small, on column 'Head text', 'Relation text' and 'Tail text'
def generate_embeddings(df):
    embeddings = []
    for i in range(len(df)):
        head_text = df['Head text'][i]
        relation_text = df['Relation text'][i]
        tail_text = df['Tail text'][i]
        # embed and store the embeddings in separated lists
        head_embedding = client.embeddings.create(input=head_text, model="text-embedding-3-small")
        relation_embedding = client.embeddings.create(input=relation_text, model="text-embedding-3-small")
        tail_embedding = client.embeddings.create(input=tail_text, model="text-embedding-3-small")
        embeddings.append([head_embedding, relation_embedding, tail_embedding])
    return embeddings

# update the dataframe with the embeddings
def update_df(df, embeddings):
    df['Head embedding'] = [embedding[0] for embedding in embeddings]
    df['Relation embedding'] = [embedding[1] for embedding in embeddings]
    df['Tail embedding'] = [embedding[2] for embedding in embeddings]
    return df

# function to generate embeddings for a certain number of rows
def generate_embeddings_batch(df, start, end=None):
    # end = -1 means the end of the dataframe
    if end == -1:
        end = len(df)
    embeddings = []
    for i in range(start, end):
        head_text = df['Head text'][i]
        relation_text = df['Relation text'][i]
        tail_text = df['Tail text'][i]
        # embed and store the embeddings in separated lists
        head = client.embeddings.create(input=[head_text], model="text-embedding-3-small")
        head_embedding = [r.embedding for r in head.data]
        relation = client.embeddings.create(input=[relation_text], model="text-embedding-3-small")
        relation_embedding = [r.embedding for r in relation.data]
        if tail_text.strip():
            tail = client.embeddings.create(input=[tail_text], model="text-embedding-3-small")
            tail_embedding = [r.embedding for r in tail.data]
        else:
            tail_embedding = []
        embeddings.append([head_embedding, relation_embedding, tail_embedding])
    return embeddings




: 

In [101]:
# Re-write the embeddings fucntion to update embedding to neo4j head, relation and tail nodes
def get_openai_embedding(text):
    # embed and store the embeddings in separated lists
    response = client.embeddings.create(input=text, model="text-embedding-3-small")
    return response.data[0].embedding

# Write function to access Neo4j graph, match nodes that have "text" property and embed the text into a new property "embedding"
def update_neo4j_embeddings():
    with driver.session() as session:
        query_node_property = """
        MATCH (n)
        WHERE n.text IS NOT NULL
        RETURN n.article_id AS article_id, n.paragraph_id AS paragraph_id, n.event_id AS event_id, n.text AS text_property
        """
        result = session.run(query_node_property)
        for record in result:
            text = record['text_property']
            embedding = get_openai_embedding(text)
            query_update_embedding = """
            MATCH (n)
            WHERE n.text = $text
            SET n.embedding = $embedding
            """
            session.run(query_update_embedding, text=text, embedding=embedding)

def update_relation_neo4j_embeddings():
    with driver.session() as session:
        query_relation_property = """
        MATCH ()-[r]->()
        WHERE r.text IS NOT NULL
        RETURN r.article_id AS article_id, r.paragraph_id AS paragraph_id, r.event_id AS event_id, r.text AS text_property
        """
        result = session.run(query_relation_property)
        for record in result:
            text = record['text_property']
            embedding = get_openai_embedding(text)
            query_update_embedding = """
            MATCH ()-[r]->()
            WHERE r.text = $text
            SET r.embedding = $embedding
            """
            session.run(query_update_embedding, text=text, embedding=embedding)

In [91]:
# test get_openai_embedding
text = 'hello world'
embedding = get_openai_embedding(text)

In [103]:
update_relation_neo4j_embeddings()

In [99]:
update_neo4j_embeddings()

In [47]:
# check df head, relation and tail text if there is any NaN value
print(df['Head text'].isnull().sum())
print(df['Relation text'].isnull().sum())
print(df['Tail text'].isnull().sum())

# print the df Head, Relation, Tail for checking
print(df['Head text'])
print(df['Relation text'])
print(df['Tail text'])
# check number of items in the Head column, Relation column and Tail column
print(len(df['Head text']))
print(len(df['Relation text']))
print(len(df['Tail text']))


0
0
0
0      Flexible endoscopes equipped with video cameras
1                     specialized, longer enteroscopes
2                                            endoscopy
3                                            endoscopy
4                                            endoscopy
                            ...                       
993                                     fistula-in-ano
994                                  anorectal abscess
995                                  anorectal abscess
996                                     fistula-in-ano
997                               rectovaginal fistula
Name: Head text, Length: 998, dtype: object
0        can be used to view
1      can be used to assess
2                can combine
3                      gives
4            often outweighs
               ...          
993                      and
994                      and
995                  implies
996                  implies
997                  implies
Name: Relation text, Lengt

In [23]:
# embeddings = generate_embeddings(df)
# Test generate embeddings batch with 1 row
embeddings = generate_embeddings_batch(df, 0, 1)
print(embeddings)

[[[[0.023966971784830093, 0.011609974317252636, 0.0076818810775876045, -0.00022041054035071284, -0.05518006160855293, 0.0023935844656080008, -0.023904720321297646, -0.00251341937109828, 0.0343630351126194, -0.020804576575756073, 0.04581737890839577, -0.07171415537595749, 0.016471846029162407, -0.033192697912454605, 0.023705514147877693, -0.05049872025847435, -0.01980854570865631, -0.005829887930303812, -0.006309227552264929, 0.05981159955263138, 0.03491085022687912, -0.007451549172401428, -0.007327045779675245, -0.05503065511584282, -0.001314292661845684, -0.042729686945676804, -0.032943692058324814, 0.05453264340758324, 0.019173577427864075, 0.0067792292684316635, 0.032171767204999924, -0.03550846874713898, -0.03329230099916458, -0.0067543284967541695, -0.08067842572927475, 0.05234137549996376, 0.0028153411112725735, 0.0034487538505345583, -0.026021283119916916, 0.025672672316432, 0.0508224293589592, 0.005941941402852535, 0.018638210371136665, -0.00992917362600565, 0.00836042687296867

In [33]:
# pring embeddings's shape. note that embeddings is a list of lists of lists
head_embedding = embeddings[0][0]
relation_embedding = embeddings[0][1]
tail_embedding = embeddings[0][2]
print("head_embedding shape:", head_embedding[0])
print("relation_embedding shape:", relation_embedding[0])
print("tail_embedding shape:", tail_embedding[0])

head_embedding shape: [0.023966971784830093, 0.011609974317252636, 0.0076818810775876045, -0.00022041054035071284, -0.05518006160855293, 0.0023935844656080008, -0.023904720321297646, -0.00251341937109828, 0.0343630351126194, -0.020804576575756073, 0.04581737890839577, -0.07171415537595749, 0.016471846029162407, -0.033192697912454605, 0.023705514147877693, -0.05049872025847435, -0.01980854570865631, -0.005829887930303812, -0.006309227552264929, 0.05981159955263138, 0.03491085022687912, -0.007451549172401428, -0.007327045779675245, -0.05503065511584282, -0.001314292661845684, -0.042729686945676804, -0.032943692058324814, 0.05453264340758324, 0.019173577427864075, 0.0067792292684316635, 0.032171767204999924, -0.03550846874713898, -0.03329230099916458, -0.0067543284967541695, -0.08067842572927475, 0.05234137549996376, 0.0028153411112725735, 0.0034487538505345583, -0.026021283119916916, 0.025672672316432, 0.0508224293589592, 0.005941941402852535, 0.018638210371136665, -0.00992917362600565, 

In [None]:
embeddings_all = generate_embeddings_batch(df, 0, -1)
print(embeddings_all)

In [1]:
from neo4j import GraphDatabase
import pandas as pd

In [29]:
df_xlsx = pd.read_excel('output_embeddings.xlsx')

def import_to_neo4j(df_xlsx):
    with driver.session() as session:
        for _, row in df_xlsx.iterrows():
            head_embedding = row['Head embedding']
            relation_embedding = row['Relation embedding']
            tail_embedding = row['Tail embedding']

            # store text
            head_text = row['Head text'].replace("'", "''")
            relation_text = row['Relation text'].replace("'", "''")
            # if tail text is not NaN, replace single quote with double single quote
            if pd.notnull(row['Tail text']):
                tail_text = row['Tail text'].replace("'", "''")

            # store Event ID, Paragraph ID, Article ID
            event_id = row['Event ID']
            paragraph_id = row['Paragraph ID']
            article_id = row['Article ID']

            # Extract and process head and tail labels as lists, replace spaces with underscores, replace "-" with "_"
            
            head_labels = [label.strip().replace(" ", "_").replace("-", "_").replace("'", "''") for label in row['Head labels'].split(",")]
            tail_labels = [label.strip().replace(" ", "_").replace("-", "_").replace("'", "''") for label in row['Tail labels'].split(",")]
            relation_label = row['Relation label'].replace(" ", "_").replace("-", "_").replace("'", "''")

            # Create Cypher labels for head and tail nodes
            head_labels_cypher = ':'.join(head_labels)
            tail_labels_cypher = ':'.join(tail_labels)

            # Create Cypher query to create head and tail nodes, with properties and relation
            query = f"""
            MERGE (head:{head_labels_cypher} {{
                text: $head_text, 
                embedding: $head_embedding, 
                event_id: $event_id, 
                paragraph_id: $paragraph_id, 
                article_id: $article_id
            }})
            MERGE (tail:{tail_labels_cypher} {{
                text: $tail_text, 
                embedding: $tail_embedding, 
                event_id: $event_id, 
                paragraph_id: $paragraph_id, 
                article_id: $article_id
            }})
            MERGE (head)-[:{relation_label} {{
                text: $relation_text,
                embedding: $relation_embedding,
                event_id: $event_id,
                paragraph_id: $paragraph_id,
                article_id: $article_id
            }}]->(tail)
            """

            parameters = {
                'head_text': head_text,
                'head_embedding': head_embedding,
                'tail_text': tail_text,
                'tail_embedding': tail_embedding,
                'event_id': event_id,
                'paragraph_id': paragraph_id,
                'article_id': article_id,
                'relation_text': relation_text,
                'relation_embedding': relation_embedding
            }
            session.run(query, parameters)

In [37]:
# document_relation_query = f"""
# MATCH (a), (b) 
# WHERE a.article_id = b.article_id 
# AND a.event_id <> b.event_id 
# CREATE (a)-[:IS_IN_THE_SAME_ARTICLE]->(b)
# """

# paragraph_relation_query = f"""
# MATCH (a), (b)
# WHERE a.paragraph_id = b.paragraph_id
# AND a.event_id <> b.event_id
# CREATE (a)-[:IS_IN_THE_SAME_PARAGRAPH]->(b)
# """

In [40]:
document_contain_paragraph_query = f"""
MATCH (n) 
WITH DISTINCT n.article_id AS article_id, n.paragraph_id AS paragraph_id 
WHERE article_id IS NOT NULL AND paragraph_id IS NOT NULL
MERGE (a:Article {{article_id: article_id}})
MERGE (p:Paragraph {{paragraph_id: paragraph_id}})
MERGE (a)-[:CONTAINS]->(p)
"""
paragraph_contain_event_query = f"""
MATCH (n) 
WITH n, n.paragraph_id AS paragraph_id, n.article_id AS article_id 
WHERE paragraph_id IS NOT NULL AND article_id IS NOT NULL
MERGE (e:Event {{event_id: n.event_id, paragraph_id: paragraph_id}})
MERGE (p:Paragraph {{paragraph_id: paragraph_id}})
MERGE (p)-[:CONTAINS]->(e)
"""
head_relation_tail_of_event_query = f"""
MATCH p=()-[r]->()
MATCH (e:Event)
WHERE r.event_id = e.event_id
RETURN p
"""
event_head_relation_tail_query = f"""
MATCH (e:Event), (h)-[r]->(t)
WHERE h.event_id = e.event_id AND t.event_id = e.event_id
MERGE (e)-[:HAS_HEAD]->(h)
MERGE (e)-[:HAS_TAIL]->(t)
RETURN e, h, r, t
"""

In [30]:
import_to_neo4j(df_xlsx)

In [34]:
with driver.session() as session:
    session.run(document_relation_query)

In [35]:
with driver.session() as session:
    session.run(paragraph_relation_query)

In [41]:
# end the session
driver.close()

In [144]:
from neo4j import GraphDatabase

class Neo4jRetriever:
    def __init__(self, uri="bolt://localhost:54621", username="neo4j", password="saiyan94"):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

    def embed_question(self, question):
        response = client.embeddings.create(input=question, model="text-embedding-3-small")
        return response.data[0].embedding

    def get_relevant_documents(self, similarity_query, query_embedding, top_k=5):
        with self.driver.session() as session:
            results = session.run(similarity_query, query_embedding=query_embedding, top_k=top_k)
            
            # return the full text concatenation of head, relation and tail
            return [(record['n_text'], record['r_text'], record['m_text']) for record in results]
    def close(self):
        self.driver.close()

In [115]:
# step_by_step_similarity_query will return the similarity of the head, based on the input_text
similarity_query = f"""
MATCH (n)-[r]->(m)
WHERE n.embedding IS NOT NULL AND m.embedding IS NOT NULL AND r.embedding IS NOT NULL
WITH n, m, r, 
    gds.similarity.cosine(n.embedding, $query_embedding) AS head_similarity,
    gds.similarity.cosine(m.embedding, $query_embedding) AS tail_similarity,
    gds.similarity.cosine(r.embedding, $query_embedding) AS relation_similarity
WHERE head_similarity > 0.5 OR tail_similarity > 0.5 OR relation_similarity > 0.5
RETURN n, m, r, n.text AS n_text, m.text AS m_text, r.text AS r_text, head_similarity, tail_similarity, relation_similarity
ORDER BY head_similarity DESC, tail_similarity DESC, relation_similarity DESC
LIMIT $top_k
"""

In [130]:
similarity_query_2 = f"""
MATCH (n)-[r]->(m)
WHERE n.embedding IS NOT NULL
WITH n, m, r, gds.similarity.cosine(n.embedding, $query_embedding) AS head_similarity
WHERE head_similarity > 0.5
RETURN n, m, r, n.text AS n_text, m.text AS m_text, r.text AS r_text, head_similarity
ORDER BY head_similarity DESC
LIMIT $top_k
"""

In [162]:
similarity_query_3 = f"""
MATCH (m:Tail)
WHERE m.embedding IS NOT NULL
WITH m, gds.similarity.cosine(m.embedding, $query_embedding) AS m_similarity
WHERE m_similarity > 0.5
MATCH (n)-[r]->(m)
WHERE n.text IS NOT NULL AND r.text IS NOT NULL
RETURN n.text AS n_text, r.text AS r_text, m.text AS m_text, m_similarity
ORDER BY m_similarity DESC
LIMIT $top_k
"""

In [173]:
similarity_query_4 = f"""
MATCH (n)-[r]->(m)
WHERE r.embedding IS NOT NULL  // Ensure the relationship has an embedding
WITH n, m, r, gds.similarity.cosine(r.embedding, $query_embedding) AS r_similarity  // Include n and m in the WITH clause
WHERE r_similarity > 0.3  // Filter based on the similarity threshold
RETURN n.text AS n_text, m.text AS m_text, type(r) AS relation_type, r.text AS r_text, r_similarity  // Return the type and text of the relationship along with similarity score
ORDER BY r_similarity DESC  // Order by similarity
LIMIT $top_k  // Limit results to top K
"""

In [118]:
neo4j_retriever = Neo4jRetriever()
question = "Tell me step by step of colonoscopy for cancer screening?"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(query_embedding)
for doc in relevant_docs:
    print(doc)

# Close the retriever connection
neo4j_retriever.close()

('During colonoscopy', 'any polyps seen are removed and', 'evaluated for possible cancer')
('Colonoscopy', 'may be valuable to rule out', 'inflammatory bowel disease')
('normal colonoscopy', 'negates', 'advanced colon cancer')
('virtual colonoscopy', 'include', 'inability to biopsy the polyps at the time of examination')
('virtual colonoscopy', 'include', 'radiation exposure')


In [164]:
neo4j_retriever = Neo4jRetriever()
question = "Tell me step by step of colonoscopy for cancer screening?"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(similarity_query_2, query_embedding, top_k=10)
for doc in relevant_docs:
    print(doc)

# Close the retriever connection
neo4j_retriever.close()

('During colonoscopy', 'any polyps seen are removed and', 'evaluated for possible cancer')
('Colonoscopy', 'may be valuable to rule out', 'inflammatory bowel disease')
('normal colonoscopy', 'negates', 'advanced colon cancer')
('virtual colonoscopy', 'include', 'inability to biopsy the polyps at the time of examination')
('virtual colonoscopy', 'include', 'radiation exposure')
('colonoscopy', 'is preferred because', 'polyps may be removed during that procedure')
('colonoscopy', 'alternative CRC screening tests are available for patients who decline', 'colonoscopy')
('colonoscopy', 'requires stopping', 'oral iron-containing medications')
('colonoscopy', 'requires', 'cleansing of the colon')
('colonoscopy', 'requires', 'rapid colonic preparation')


In [158]:
from neo4j import GraphDatabase

class Neo4jRetriever:
    def __init__(self, uri="bolt://localhost:54621", username="neo4j", password="saiyan94"):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

    def embed_question(self, question):
        response = client.embeddings.create(input=question, model="text-embedding-3-small")
        return response.data[0].embedding

    def get_relevant_documents(self, similarity_query, query_embedding, top_k=5):
        with self.driver.session() as session:
            results = session.run(similarity_query, query_embedding=query_embedding, top_k=top_k)
            return [(record['n_text'], record['r_text'], record['m_text']) for record in results]
            
    def close(self):
        self.driver.close()

In [163]:
neo4j_retriever = Neo4jRetriever()
question = "Tell me step by step of colonoscopy for cancer screening?"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(similarity_query_3, query_embedding, top_k=10)
for doc in relevant_docs:
    print(doc)

# Close the retriever connection
neo4j_retriever.close()

('lower GI bleeding', 'for', 'Colonoscopy')
('age ≥ 45', 'is a threshold for', 'screening colonoscopy')
('high risk of colon cancer', 'is a reason for', 'screening colonoscopy')
('positive fecal occult blood tests', 'require', 'colonoscopy')
('positive fecal DNA tests', 'require', 'colonoscopy')
('lower GI bleeding', 'typically includes', 'colonoscopy')
('Ulcerative colitis', 'diagnosis is by', 'colonoscopy')
('lesions seen during sigmoidoscopy', 'require', 'colonoscopy')
('Colorectal cancer', 'Diagnosis is by', 'colonoscopy')
('lesions seen during an imaging study', 'require', 'colonoscopy')


In [174]:
neo4j_retriever = Neo4jRetriever()
question = "Tell me step by step of colonoscopy for cancer screening?"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(similarity_query_4, query_embedding, top_k=10)
for doc in relevant_docs:
    print(doc)

# Close the retriever connection
neo4j_retriever.close()

('Virtual (CT) colonoscopy', 'visualizes the entire colon', 'in contrast, conventional colonoscopy does not allow the right colon to be evaluated completely in about 1 in 10 patients')
('average-risk patients', 'should begin screening for colorectal cancer at age 45 years', 'screening for colorectal cancer')
('average-risk patients', 'should continue screening for colorectal cancer until age 75 years', 'screening for colorectal cancer')
('adults aged 76 to 85', 'the decision whether to screen for CRC should be individualized', 'screening for colorectal cancer')
('colonoscopy', 'alternative CRC screening tests are available for patients who decline', 'colonoscopy')
('high-risk conditions (eg, ulcerative colitis)', 'screening is discussed under', 'the specific condition')
('economic issues preclude screening with colonoscopy', 'alternative CRC screening tests are available for patients for whom', 'colonoscopy')
('During colonoscopy', 'any polyps seen are removed and', 'evaluated for poss

In [180]:
neo4j_retriever = Neo4jRetriever()
questions = """
A 52-year-old man comes to to the emergency department with fatigue and shortness of breath that has become progressively worse over the past week.
He had an upper respiratory tract infection 2 weeks ago, for which he was given an antibiotic. 
He has hypertension, type 2 diabetes mellitus, and colonic polyps diagnosed on screening colonoscopy 2 years ago. 
His mother has systemic lupus erythematosus and his brother has a bicuspid aortic valve. He does not smoke cigarettes or drink alcohol. 
Current medications include lisinopril and metformin. 
His temperature is 37.3Â°C (99.1Â°F), pulse is 91/min, respirations are 18/min, and blood pressure is 145/84 mm Hg. 
His conjunctivae are pale. Cardiac examination shows a late systolic crescendo-decrescendo murmur at the right upper sternal border. 
Laboratory studies show:
Leukocyte Count 9,500/mm3
Hematocrit 24%
Platelet Count 178,000/mm3
LDH 215 U/L
Haptoglobin 22 mg/dL (N=41â€“165 mg/dL)
Serum
Na+ 140 mEq/L
K+ 4.6 mEq/L
CL- 100 mEq/L
HCO3- 25 mEq/L
Urea nitrogen 21 mg/dL
Creatinine 1.2 mg/dL
Total bilirubin 1.9 mg/dL
A peripheral blood smear is shown. 
Which of the following is the most likely cause of this patient's anemia?
"""
query_embedding = neo4j_retriever.embed_question(question)
def answer_chain_of_questions(questions):
    # feed each line in questions to the retriever, get the relevant documents, append to a list
    query_results = []
    for question in questions.split("\n"):
        query_embedding = neo4j_retriever.embed_question(question)
        relevant_docs = neo4j_retriever.get_relevant_documents(similarity_query_4, query_embedding, top_k=10)
        query_results.append(relevant_docs)
    return query_results



In [181]:
answer_chain_of_questions(questions)

[[('colon cancer', 'requires', 'diagnostic test'),
  ('IV neostigmine', 'requires', 'cardiac monitoring'),
  ('severe GI bleeding',
   'requires',
   'admission to an intensive care unit or other monitored setting'),
  ('Massive GI bleeding', 'requires', 'quick diagnosis and treatment'),
  ('colonoscopy', 'requires', 'cleansing of the colon'),
  ('endoscopy',
   'requires',
   'no solids for 8 hours and no liquids for 2 to 4 hours before the procedure'),
  ('Asymptomatic diverticulosis', 'requires', 'no treatment'),
  ('colonoscopy', 'requires', 'rapid colonic preparation'),
  ('severe GI bleeding',
   'requires',
   'consultation by both a gastroenterologist and a surgeon'),
  ('distal lesion found by flexible sigmoidoscopy',
   'requires',
   'complete colonoscopy to the cecum')],
 [('Mucosal prolapse in adults',
   'may progressively worsen',
   'progressively worsen'),
  ('numerous colon polyps',
   'affected patients usually present with',
   'colorectal cancer'),
  ('MUTYH polypo

In [195]:
# write function to use Langchain Chat API to answer the questions, based on the return of the answer_chain_of_questions
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4o",
    api_key="sk-proj-pjGUyMNiHPDiH7CLYv4rT3BlbkFJQpZiY1U0sYAqOATygpnJ",
    temperature=0.0,
    max_tokens=None
    )
def answer_questions(query_results):
    # feed each relevant document to the Langchain Chat API, get the answer
    answers = []
    message_template = [
        (
            "system",
            """You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases. Based on the patient's information,
            what are the possible causes of the patient's condition?
            """
        ),
        (
            "user",
            "{patient_information}"
        )
    ]
    # use llm.invoke() to get the answer
    for query_result in query_results:
        for doc in query_result:
            # get all the text from the doc, concatenate them
            patient_information = doc[0] + " " + doc[1] + " " + doc[2]
            
            # Create a new message with the patient information replaced
            message = [
                message_template[0],  # system message stays the same
                (
                    message_template[1][0],  # 'doctor' part of the tuple
                    message_template[1][1].replace("{patient_information}", patient_information)  # replace patient info in the string
                )
            ]
            answer = llm.invoke(message)
            answers.append(answer)
    return answers

In [189]:
questions_chain = answer_chain_of_questions(questions)

None


In [196]:
answers = answer_questions(questions_chain)
print(answers)



In [204]:
print(len(answers))

149


In [203]:
# print all answers in the list, connect all "content"
for answer in answers:
    print(answer.content)

To diagnose colon cancer, several diagnostic tests and procedures may be utilized. These include:

1. **Colonoscopy**: This is the most common and effective test for diagnosing colon cancer. During a colonoscopy, a long, flexible tube with a camera (colonoscope) is inserted into the rectum to examine the entire colon. If any suspicious areas are found, biopsies can be taken for further analysis.

2. **Biopsy**: If a suspicious lesion is found during a colonoscopy, a biopsy is performed to remove a small sample of tissue for examination under a microscope to check for cancer cells.

3. **CT Colonography (Virtual Colonoscopy)**: This is a non-invasive imaging test that uses CT scans to produce detailed images of the colon and rectum. It is less invasive than a traditional colonoscopy but may not detect small polyps as effectively.

4. **Fecal Occult Blood Test (FOBT) or Fecal Immunochemical Test (FIT)**: These tests check for hidden blood in the stool, which can be an early sign of cance

In [208]:
# convert each answer to a list of [{"head": head_text, "relation": relation_text, "tail": tail_text}] with each dict contain an event in the answer
# the conversion would be done by feeding to the openai chat API
answer_conversion_prompt_templage="""
Think step by step. You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases. Based on the patient's information,
you have identified a possible cause of the patient's condition. Please provide a list of events that led to the diagnosis.
convert each answer to a list of [{"head": head_text, "relation": relation_text, "tail": tail_text}] with each dict contain an event in the answer
"""
message_template = [
    (
        "system",
        answer_conversion_prompt_templage
    ),
    (
        "user",
        "{answer}"
    )
]

def convert_answer_to_event(answer):
    message = [
        message_template[0],
        (
            message_template[1][0],
            message_template[1][1].replace("{answer}", answer)
        )
    ]
    event = llm.invoke(message)
    return event

In [209]:
# get all the events from the answers, convert them to a list of events
events = []
for answer in answers:
    event = convert_answer_to_event(answer.content)
    events.append(event)

In [214]:
print(events[0].content)

Here is a list of events that led to the diagnosis of colon cancer, formatted as requested:

1. [{"head": "Colonoscopy", "relation": "used for", "tail": "diagnosing colon cancer"}]
2. [{"head": "Colonoscopy", "relation": "procedure", "tail": "insertion of colonoscope to examine colon"}]
3. [{"head": "Suspicious areas found", "relation": "action", "tail": "biopsies taken for analysis"}]
4. [{"head": "Biopsy", "relation": "purpose", "tail": "examine tissue for cancer cells"}]
5. [{"head": "CT Colonography", "relation": "alternative", "tail": "non-invasive imaging test"}]
6. [{"head": "CT Colonography", "relation": "limitation", "tail": "may not detect small polyps effectively"}]
7. [{"head": "FOBT or FIT", "relation": "purpose", "tail": "check for hidden blood in stool"}]
8. [{"head": "FOBT or FIT", "relation": "limitation", "tail": "less definitive than colonoscopy"}]
9. [{"head": "Stool DNA Test", "relation": "purpose", "tail": "detect DNA markers for cancer"}]
10. [{"head": "Flexible 

In [215]:
# Use regex to extract the events from the answer. format: [{"head": head_text, "relation": relation_text, "tail": tail_text}]
import re
def extract_events(event):
    # use regex to extract the events
    pattern = r"\{.*\}"
    events = re.findall(pattern, event)
    return events

# extract all events from the list of events
extracted_events = []
for event in events:
    extracted_event = extract_events(event.content)
    extracted_events.append(extracted_event)

In [219]:
print(len(extracted_events))

149


In [218]:
extract_events(events[1].content)

['{"head": "Neostigmine administration", "relation": "can cause", "tail": "Bradycardia"}',
 '{"head": "Bradycardia", "relation": "is", "tail": "a decrease in heart rate"}',
 '{"head": "Bradycardia", "relation": "is significant and potentially dangerous", "tail": "in patients with pre-existing cardiac conditions"}',
 '{"head": "Neostigmine administration", "relation": "can induce", "tail": "Arrhythmias"}',
 '{"head": "Arrhythmias", "relation": "include", "tail": "atrioventricular block or other conduction abnormalities"}',
 '{"head": "Neostigmine administration", "relation": "can lead to", "tail": "Hypotension"}',
 '{"head": "Hypotension", "relation": "is", "tail": "a drop in blood pressure"}',
 '{"head": "Neostigmine", "relation": "is", "tail": "an acetylcholinesterase inhibitor"}',
 '{"head": "Acetylcholinesterase inhibition", "relation": "increases", "tail": "acetylcholine levels"}',
 '{"head": "Increased acetylcholine levels", "relation": "enhance", "tail": "parasympathetic activity

In [210]:
for event in events:
    print(event.content)

Here is a list of events that led to the diagnosis of colon cancer, formatted as requested:

1. [{"head": "Colonoscopy", "relation": "used for", "tail": "diagnosing colon cancer"}]
2. [{"head": "Colonoscopy", "relation": "procedure", "tail": "insertion of colonoscope to examine colon"}]
3. [{"head": "Suspicious areas found", "relation": "action", "tail": "biopsies taken for analysis"}]
4. [{"head": "Biopsy", "relation": "purpose", "tail": "examine tissue for cancer cells"}]
5. [{"head": "CT Colonography", "relation": "alternative", "tail": "non-invasive imaging test"}]
6. [{"head": "CT Colonography", "relation": "limitation", "tail": "may not detect small polyps effectively"}]
7. [{"head": "FOBT or FIT", "relation": "purpose", "tail": "check for hidden blood in stool"}]
8. [{"head": "FOBT or FIT", "relation": "limitation", "tail": "less definitive than colonoscopy"}]
9. [{"head": "Stool DNA Test", "relation": "purpose", "tail": "detect DNA markers for cancer"}]
10. [{"head": "Flexible 

In [257]:
test_event = extracted_events[0][0]

In [260]:
import json
dict_event = json.loads(test_event)
print(dict_event['head'])

Colonoscopy


In [230]:
print(extract_events(events[0].content))

['{"head": "Colonoscopy", "relation": "used for", "tail": "diagnosing colon cancer"}', '{"head": "Colonoscopy", "relation": "procedure", "tail": "insertion of colonoscope to examine colon"}', '{"head": "Suspicious areas found", "relation": "action", "tail": "biopsies taken for analysis"}', '{"head": "Biopsy", "relation": "purpose", "tail": "examine tissue for cancer cells"}', '{"head": "CT Colonography", "relation": "alternative", "tail": "non-invasive imaging test"}', '{"head": "CT Colonography", "relation": "limitation", "tail": "may not detect small polyps effectively"}', '{"head": "FOBT or FIT", "relation": "purpose", "tail": "check for hidden blood in stool"}', '{"head": "FOBT or FIT", "relation": "limitation", "tail": "less definitive than colonoscopy"}', '{"head": "Stool DNA Test", "relation": "purpose", "tail": "detect DNA markers for cancer"}', '{"head": "Flexible Sigmoidoscopy", "relation": "scope", "tail": "examines rectum and lower colon"}', '{"head": "Barium Enema", "relat

In [263]:
# import the extracted events to the Neo4j database, along with the head, relation and tail embeddings
# extracted events: [[{},{}...]]
def import_events_to_neo4j(events):
    with driver.session() as session:
        for event in events:
            for extracted_event in event:
                extracted_event = json.loads(extracted_event)
                head_text = extracted_event['head']
                relation_text = extracted_event['relation']
                tail_text = extracted_event['tail']
                head_embedding = get_openai_embedding(head_text)
                relation_embedding = get_openai_embedding(relation_text)
                tail_embedding = get_openai_embedding(tail_text)
                
                # Dynamically construct the query with the relation type
                query = f"""
                MERGE (head:HeadDemo {{text: $head_text, embedding: $head_embedding}})
                MERGE (tail:TailDemo {{text: $tail_text, embedding: $tail_embedding}})
                MERGE (head)-[:`{relation_text}` {{text: $relation_text, embedding: $relation_embedding}}]->(tail)
                """
                
                parameters = {
                    'head_text': head_text,
                    'head_embedding': head_embedding,
                    'tail_text': tail_text,
                    'tail_embedding': tail_embedding,
                    'relation_text': relation_text,
                    'relation_embedding': relation_embedding
                }
                session.run(query, parameters)



In [264]:
import_events_to_neo4j(extracted_events)

In [111]:
question = "How is colonoscopy performed?"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(query_embedding)
for doc in relevant_docs:
    print(doc)

  with self.driver.session() as session:


('During colonoscopy', 'any polyps seen are removed and', 'evaluated for possible cancer')
('Colonoscopy', 'may be valuable to rule out', 'inflammatory bowel disease')
('normal colonoscopy', 'negates', 'advanced colon cancer')
('colonoscopy', 'alternative CRC screening tests are available for patients who decline', 'colonoscopy')
('colonoscopy', 'requires', 'rapid colonic preparation')


In [271]:
QA_template = [
    (
        "system",
        "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases. Based on the patient's information, you are to provide possible causes of the patient's condition, and ruling, and diagnosis."
    ),
    (
        "user",
        "Based on {patient_information}, what are the possible causes of the patient's condition?"
    )
]

patient_information_1 = """
A 52-year-old man comes to to the emergency department with fatigue and shortness of breath that has become progressively worse over the past week. 
He had an upper respiratory tract infection 2 weeks ago, for which he was given an antibiotic. 
He has hypertension, type 2 diabetes mellitus, and colonic polyps diagnosed on screening colonoscopy 2 years ago. 
His mother has systemic lupus erythematosus and his brother has a bicuspid aortic valve. He does not smoke cigarettes or drink alcohol. 
Current medications include lisinopril and metformin. 
His temperature is 37.3Â°C (99.1Â°F), pulse is 91/min, respirations are 18/min, and blood pressure is 145/84 mm Hg. 
His conjunctivae are pale. Cardiac examination shows a late systolic crescendo-decrescendo murmur at the right upper sternal border. 
Laboratory studies show:
Leukocyte Count 9,500/mm3
Hematocrit 24%
Platelet Count 178,000/mm3
LDH 215 U/L
Haptoglobin 22 mg/dL (N=41â€“165 mg/dL)
Serum
Na+ 140 mEq/L
K+ 4.6 mEq/L
CL- 100 mEq/L
HCO3- 25 mEq/L
Urea nitrogen 21 mg/dL
Creatinine 1.2 mg/dL
Total bilirubin 1.9 mg/dL
A peripheral blood smear is shown."
"""

# vector query “potential causes of {p.patient_context}” from the “CMG”, and Generate reasoning for ruling
# Use Confirmatory Evidence: Tie laboratory findings, such as the presence of schistocytes, directly to the diagnosis.
# Conclude with Clear Justification: Summarize the reasoning leading to the final answer in a concise manner.

# Use the Langchain OpeAI Chat API to semantically split the question into parts: 
# patient general information, patient history, patient physical examination, patient laboratory findings, patient imaging findings, patient diagnosis, patient treatment, patient prognosis
# and question.

def get_patient_information(patient_information):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases. Based on the patient's information, you are to provide possible causes of the patient's condition, and ruling, and diagnosis."
        ),
        (
            "user",
            "split the {patient_information} into parts: patient general information, patient history, patient physical examination, patient laboratory findings, patient imaging findings, patient diagnosis, patient treatment, patient prognosisand question.".replace('{patient_information}', patient_information_1)
        )
    ]
    patient_information = llm.invoke(message)
    return patient_information

In [273]:
p_info = get_patient_information(patient_information_1)

In [274]:
print(p_info.content)

Certainly, here is the information split into the requested parts:

**Patient General Information:**
- Age: 52 years old
- Gender: Male

**Patient History:**
- Presenting symptoms: Fatigue and shortness of breath progressively worsening over the past week
- Recent illness: Upper respiratory tract infection 2 weeks ago, treated with an antibiotic
- Medical history: Hypertension, type 2 diabetes mellitus, colonic polyps diagnosed on screening colonoscopy 2 years ago
- Family history: Mother with systemic lupus erythematosus, brother with a bicuspid aortic valve
- Lifestyle: Does not smoke cigarettes or drink alcohol
- Current medications: Lisinopril, Metformin

**Patient Physical Examination:**
- Temperature: 37.3°C (99.1°F)
- Pulse: 91/min
- Respirations: 18/min
- Blood pressure: 145/84 mm Hg
- Conjunctivae: Pale
- Cardiac examination: Late systolic crescendo-decrescendo murmur at the right upper sternal border

**Patient Laboratory Findings:**
- Leukocyte Count: 9,500/mm³
- Hematocrit:

In [275]:
neo4j_retriever = Neo4jRetriever()
question = "Mechanical destruction of erythrocytes"
query_embedding = neo4j_retriever.embed_question(question)
relevant_docs = neo4j_retriever.get_relevant_documents(similarity_query_4, query_embedding, top_k=10)
for doc in relevant_docs:
    print(doc)

# Close the retriever connection
neo4j_retriever.close()

('Radiation therapy', 'can damage tissues', 'lead to abscess formation')
('obstructing tumors', 'can be debulked by', 'electrocoagulation')
('local trauma from impacted feces in a diverticulum', 'can erode', 'adjacent vessel')
('Juvenile polyps', 'outgrow their blood supply and autoamputate', 'during or after puberty')


In [297]:
def extract_key_information(patient_information):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases."
        ),
        (
            "user",
            """
            Extract relevant clinical {patient_information} from the following MedQA-USMLE question and answer text, focusing specifically on colonic diseases. Use the provided form to structure the extracted data. If any key information is missing, leave it blank.
            If any additional information is present, include it in the appropriate section.

            Extract the following:
            - Demographics: Include age, gender, family history, social history, and relevant medical history of gastrointestinal conditions.
            - Chief Complaint: Describe the patient's main gastrointestinal issue (e.g., changes in bowel habits, rectal bleeding, abdominal pain).
            - Gastrointestinal Examination: Focus on specific gastrointestinal exam findings, such as abdominal palpation results, rectal examination, or visible signs of colon-related issues.
            - Laboratory Findings: Extract relevant lab results like leukocyte count, platelet count, fecal occult blood test (FOBT), and carcinoembryonic antigen (CEA). Include specific inflammatory markers if relevant (e.g., calprotectin for IBD).
            - Imaging Findings: Extract imaging results such as colonoscopy findings, CT colonography results, and biopsy histopathology reports.
            - Past Medical History: Include previous gastrointestinal surgeries, conditions like IBD, previous colonoscopy reports, or any surgical history related to the gastrointestinal system.
            - Review of Gastrointestinal System: Include any specific symptoms related to colonic diseases (e.g., bowel habit changes, rectal bleeding, abdominal pain, bloating, unexplained weight loss).
            - Assessment/Diagnosis: Extract provisional or final diagnoses, focusing on specific conditions like colorectal cancer, diverticulitis, polyps, or IBD.
            - Treatment Plan: Extract treatment recommendations, including medication (e.g., anti-inflammatory drugs for IBD, chemotherapy for colorectal cancer), polyp removal, and dietary recommendations. Specify surgical intervention if mentioned.
            - Patient Education: Summarize how the findings were explained to the patient and any lifestyle or follow-up recommendations provided.

            Example:

            Given the question:
            \"A 50-year-old male presents with a 6-month history of rectal bleeding and changes in bowel habits. Colonoscopy reveals a 3 cm polyp in the sigmoid colon...\"

            The extracted information should be (note that extract info if available, but leave blank if not mentioned):
            {
                "demographic": {
                    "Age": "50",
                    "Gender": "Male",
                    "Family History": "Not mentioned",
                    "Social History": "Not mentioned",
                    "Medical History": "Not mentioned"
                },
                "chief_complaint": {
                    "Chief Complaint": "6-month history of rectal bleeding and changes in bowel habits."
                },
                "gastrointestinal_examination": {
                    "Abdominal Palpation": "Not mentioned",
                    "Rectal Examination": "Not mentioned",
                    "Visible Signs": "Not mentioned"
                },
                "laboratory_findings": {
                    "Leukocyte Count": "Not mentioned",
                    "Platelet Count": "Not mentioned",
                    "FOBT": "Not mentioned",
                    "CEA": "Not mentioned",
                    "Liver Function Tests": "Not mentioned",
                    "Serum": {
                        "Na+": "Not mentioned",
                        "K+": "Not mentioned"
                    }
                },
                "imaging_findings": {
                    "Colonoscopy Findings": "3 cm polyp in the sigmoid colon.",
                    "Colonoscopy Biopsy Results": "Not mentioned",
                    "CT Colonography": "Not mentioned",
                    "MRI": "Not mentioned"
                },
                "past_medical_history": {
                    "Chronic Conditions": "Not mentioned",
                    "Previous Colonoscopies": "Not mentioned",
                    "Previous Surgeries": "Not mentioned",
                    "Allergies": "Not mentioned"
                },
                "review_of_gastrointestinal_system": {
                    "Bowel Habits": "Changes in bowel habits",
                    "Rectal Bleeding": "Yes",
                    "Abdominal Pain": "Not mentioned",
                    "Unexplained Weight Loss": "Not mentioned"
                },
                "diagnostic_studies": {
                    "Colonoscopy Report": "3 cm polyp in the sigmoid colon.",
                    "CT Scan": "Not mentioned",
                    "MRI": "Not mentioned",
                    "Pathology Report": "Not mentioned"
                },
                "assessment_diagnosis": {
                    "Provisional Diagnosis": "Polyp in sigmoid colon",
                    "Differential Diagnosis": "Not mentioned",
                    "Final Diagnosis": "Not mentioned"
                },
                "treatment_plan": {
                    "Polyp Removal": "Recommendation for polyp removal",
                    "Medications Prescribed": "Not mentioned",
                    "Dietary Recommendations": "Not mentioned",
                    "Follow-up Colonoscopy": "Not mentioned",
                    "Referrals": "Not mentioned"
                },
                "patient_education": {
                    "Explanation of Findings": "Not mentioned",
                    "Lifestyle Modifications": "Not mentioned"
                }
            }
            """.replace('{patient_information}', patient_information)
        )
    ]
    key_information = llm.invoke(message)
    return key_information

def generate_potential_causes(key_information, addition_message=None):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases."
        ),
        (
            "user",
            """
            From key info as evidence, generate potential causes and reasoning of {key_information}, in the form of 
            [
            {
            "Evidence": [list of evidence],
            "Cause": "Potential Cause 1", 
            "Reasoning": "Reasoning for Potential Cause 1"},
            {
            "Evidence": [list of evidence],
            "Cause": "Potential Cause 2",
            "Reasoning": "Reasoning for Potential Cause 2"},
            ]
            """.replace('{key_information}', key_information)
        )
    ]
    message.append(addition_message)
    potential_causes = llm.invoke(message)
    return potential_causes

def generate_potential_next_steps_for_each_cause(potential_causes, addition_message=None):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases."
        ),
        (
            "user",
            """
            From potential causes {potential_causes}, generate potential next steps for each cause, in the form of
            [
            {
            "Cause": "Potential Cause 1",
            "Next Steps": "Next Steps for Potential Cause 1"},
            {
            "Cause": "Potential Cause 2",
            "Next Steps": "Next Steps for Potential Cause 2"}
            ]
            """.replace('{potential_causes}', potential_causes)
        )
    ]
    potential_next_steps = llm.invoke(message.append(addition_message))
    return potential_next_steps

def generate_reasoning_for_ruling(input_information):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases."
        ),
        (
            "user",
            "generate reasoning for ruling based on {input_information}".replace('{input_information}', input_information)
        )
    ]
    reasoning_for_ruling = llm.invoke(message)
    return reasoning_for_ruling

def generate_confirmatory_evidence(patient_information):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in conducting colonoscopy and diagnosing colonic diseases."
        ),
        (
            "user",
            "generate confirmatory evidence based on {patient_information}".replace('{patient_information}', patient_information)
        )
    ]
    confirmatory_evidence = llm.invoke(message)
    return confirmatory_evidence

In [302]:
# extract key information from the patient information
key_information = extract_key_information(patient_information_1)
print(key_information.content)

```json
{
    "demographic": {
        "Age": "52",
        "Gender": "Male",
        "Family History": "Mother has systemic lupus erythematosus, brother has a bicuspid aortic valve",
        "Social History": "Does not smoke cigarettes or drink alcohol",
        "Medical History": "Colonic polyps diagnosed on screening colonoscopy 2 years ago"
    },
    "chief_complaint": {
        "Chief Complaint": "Fatigue and shortness of breath progressively worse over the past week"
    },
    "gastrointestinal_examination": {
        "Abdominal Palpation": "Not mentioned",
        "Rectal Examination": "Not mentioned",
        "Visible Signs": "Not mentioned"
    },
    "laboratory_findings": {
        "Leukocyte Count": "9,500/mm3",
        "Platelet Count": "178,000/mm3",
        "FOBT": "Not mentioned",
        "CEA": "Not mentioned",
        "Liver Function Tests": "Total bilirubin 1.9 mg/dL",
        "Serum": {
            "Na+": "140 mEq/L",
            "K+": "4.6 mEq/L"
        }
    },

In [281]:
# generate potential causes from the key information
potential_causes = generate_potential_causes(key_information.content)
print(potential_causes.content)

```json
[
    {
        "Evidence": [
            "Age: 52",
            "Gender: Male",
            "Colonic polyps diagnosed on screening colonoscopy 2 years ago",
            "Fatigue and shortness of breath progressively worse over the past week"
        ],
        "Cause": "Anemia due to gastrointestinal bleeding",
        "Reasoning": "The patient's age and history of colonic polyps increase the risk of gastrointestinal bleeding, which can lead to anemia. Anemia could explain the symptoms of fatigue and shortness of breath. However, the absence of reported rectal bleeding or FOBT results makes this less certain."
    },
    {
        "Evidence": [
            "Age: 52",
            "Gender: Male",
            "Chronic Conditions: Hypertension, type 2 diabetes mellitus",
            "Fatigue and shortness of breath progressively worse over the past week",
            "Total bilirubin: 1.9 mg/dL"
        ],
        "Cause": "Cardiovascular disease",
        "Reasoning": "The patien

In [298]:
# append an additional clause into user prompt of message_template
addition_message = (
    "user",
    "limit the potential causes only direct cause, not the indirect cause") 

poten_cause = generate_potential_causes(key_information.content, addition_message)


In [301]:
print(poten_cause.content)

```json
[
    {
        "Evidence": [
            "Age: 52",
            "Gender: Male",
            "Medical History: Colonic polyps diagnosed on screening colonoscopy 2 years ago",
            "Chief Complaint: Fatigue and shortness of breath progressively worse over the past week"
        ],
        "Cause": "Anemia due to gastrointestinal bleeding",
        "Reasoning": "The patient's fatigue and shortness of breath could be indicative of anemia, which might be caused by bleeding from colonic polyps. Although there is no direct evidence of bleeding, the history of colonic polyps raises the possibility of occult gastrointestinal bleeding leading to anemia."
    },
    {
        "Evidence": [
            "Age: 52",
            "Gender: Male",
            "Medical History: Hypertension, type 2 diabetes mellitus",
            "Chief Complaint: Fatigue and shortness of breath progressively worse over the past week",
            "Serum Creatinine: 1.2 mg/dL",
            "Total bilirubin

In [16]:
# Write a function to answer Multiple choice USMLE questions
def answer_multiple_choice_question(question, options):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases."
        ),
        (
            "user",
            """"A": "IV fluid resuscitation", "B": "Administration of supplemental oxygen", "C": "Scheduled meal times", "D": "Monitoring and stress counseling", "E": "PO bismuth subsalicylate", "F": "Trial of metoclopramide", "G": "Discontinuation of ginger and vitamin B6", "H": "Addition of doxylamine""A": "IV fluid resuscitation", "B": "Administration of supplemental oxygen", "C": "Scheduled meal times", "D": "Monitoring and stress counseling", "E": "PO bismuth subsalicylate", "F": "Trial of metoclopramide", "G": "Discontinuation of ginger and vitamin B6", "H": "Addition of doxylamine"
            Answer the following multiple choice question: {question} {options} To answer this questions, do:
            - Action 1: for each answer option, find evidences to rule them out
            - Action 2: for each answer option, find evidences that may lead to inference on the causal path of patient infos
            - reasoning over Action 1 and Action 2 in a debate
            """.replace('{question}', question).replace('{options}', options)
        )
    ]
    answer = llm.invoke(message)
    return answer

In [1]:
multiple_choice_question = """
A 52-year-old man comes to to the emergency department with fatigue and shortness of breath that has become progressively worse over the past week. He had an upper respiratory tract infection 2 weeks ago, for which he was given an antibiotic. He has hypertension, type 2 diabetes mellitus, and colonic polyps diagnosed on screening colonoscopy 2 years ago. His mother has systemic lupus erythematosus and his brother has a bicuspid aortic valve. He does not smoke cigarettes or drink alcohol. Current medications include lisinopril and metformin. His temperature is 37.3°C (99.1°F), pulse is 91/min, respirations are 18/min, and blood pressure is 145/84 mm Hg. His conjunctivae are pale. Cardiac examination shows a late systolic crescendo-decrescendo murmur at the right upper sternal border. Laboratory studies show:\nLeukocyte Count 9,500/mm3\nHematocrit 24%\nPlatelet Count 178,000/mm3\nLDH 215 U/L\nHaptoglobin 22 mg/dL (N=41–165 mg/dL)\nSerum\nNa+ 140 mEq/L\nK+ 4.6 mEq/L\nCL- 100 mEq/L\nHCO3- 25 mEq/L\nUrea nitrogen 21 mg/dL\nCreatinine 1.2 mg/dL\nTotal bilirubin 1.9 mg/dL\nA peripheral blood smear is shown. Which of the following is the most likely cause of this patient's anemia?\
"""

multiple_choice_options = """
{"A": "Autoimmune destruction of erythrocytes", "B": "Occult blood loss", "C": "Erythrocyte membrane fragility", "D": "Erythrocyte enzyme defect", "E": "Mechanical destruction of erythrocytes"}
"""

In [10]:
multiple_choice_question_2 = """
A 46-year-old man is brought to the emergency department 15 minutes after being involved in a motor vehicle collision where he was the unrestrained driver. On arrival, he is unconscious. His respirations are 24/min, and palpable systolic blood pressure is 60 mm Hg, and pulse is 141/min and irregularly irregular. Examination shows multiple ecchymoses over the chest. There is a 3-cm (1.2-in) laceration over the abdomen and a 4-cm (1.6-in) laceration over the scalp. Jugular venous pressure is increased. Bilateral crackles are heard at the lung bases. Cardiac examination shows no murmurs, rubs, or gallops. The abdomen is soft. Two large bore intravenous catheters are inserted and 0.9% saline infusion is begun. Focused assessment with sonography in trauma (FAST) is negative. An electrocardiogram shows absent p-waves. After 5 minutes, his pulse is 160/min and palpable systolic blood pressure is 50 mm Hg. Vasopressors are administered. One hour later, the patient dies. Which of the following was the most likely diagnosis?
"""

multiple_choice_options_2 = """
"A": "Tracheobronchial injury", "B": "Cardiac contusion", "C": "Splenic rupture", "D": "Hemothorax", "E": "Aortic dissection", "F": "Pulmonary contusion", "G": "Cardiac tamponade\n\""
"""

In [51]:
multiple_choice_question_3 = """
A 29-year-old, gravida 1 para 0, at 10 weeks' gestation comes to the physician for progressively worsening emesis, nausea, and a 2-kg (4.7-lb) weight loss over the past 2 weeks. The most recent bouts of vomiting occur around 3–4 times a day, and she is stressed that she had to take a sick leave from work the last 2 days. She is currently taking ginger and vitamin B6 with limited relief. Her pulse is 80/min, blood pressure is 100/60 mmHg, and respiratory rate is 13/min. Orthostatic vital signs are within normal limits. The patient is alert and oriented. Her abdomen is soft and nontender. Urinalysis shows no abnormalities. Her hematocrit is 40%. Venous blood gas shows:\npH 7.43\npO2 42 mmHg\npCO2 54 mmHg\nHCO3- 31 mEq/L\nSO2 80%\nIn addition to oral fluid resuscitation, which of the following is the most appropriate next step in management?\
"""

multiple_choice_options_3 = """
"A": "IV fluid resuscitation", "B": "Administration of supplemental oxygen", "C": "Scheduled meal times", "D": "Monitoring and stress counseling", "E": "PO bismuth subsalicylate", "F": "Trial of metoclopramide", "G": "Discontinuation of ginger and vitamin B6", "H": "Addition of doxylamine"
"""



In [17]:
multiple_choice_answer = answer_multiple_choice_question(multiple_choice_question, multiple_choice_options)
print(multiple_choice_answer.content)

To determine the most likely cause of this patient's anemia, let's evaluate each answer option based on the provided clinical information and laboratory findings.

**Action 1: Rule Out Evidence**

- **A: Autoimmune destruction of erythrocytes**
  - Typically associated with positive direct Coombs test and spherocytes on peripheral smear. The patient's history and lab findings do not specifically indicate autoimmune hemolytic anemia.

- **B: Occult blood loss**
  - Would typically present with iron deficiency anemia, characterized by microcytic, hypochromic anemia. The lab findings do not suggest iron deficiency (e.g., no mention of low MCV or low serum iron).

- **C: Erythrocyte membrane fragility**
  - Conditions like hereditary spherocytosis would show spherocytes on the blood smear and a positive osmotic fragility test. The patient's history and lab findings do not suggest this.

- **D: Erythrocyte enzyme defect**
  - Conditions like G6PD deficiency would present with hemolytic anem

In [15]:
multiple_choice_answer = answer_multiple_choice_question(multiple_choice_question, options="None")
print(multiple_choice_answer.content)

To determine the most likely cause of this patient's anemia, we need to analyze the clinical presentation and laboratory findings. The key findings include fatigue, shortness of breath, pale conjunctivae, a heart murmur, low hematocrit, low haptoglobin, elevated LDH, and elevated total bilirubin. The peripheral blood smear is not provided here, but we can infer it might show schistocytes given the context.

Let's consider potential causes of anemia and evaluate them:

1. **Iron Deficiency Anemia:**
   - **Action 1 (Rule Out):** Iron deficiency anemia typically presents with microcytic, hypochromic anemia. The low haptoglobin and elevated LDH and bilirubin suggest hemolysis, which is not characteristic of iron deficiency anemia.
   - **Action 2 (Inference):** The patient's history of colonic polyps and potential gastrointestinal bleeding could suggest iron deficiency, but the laboratory findings are more indicative of hemolysis.

2. **Anemia of Chronic Disease:**
   - **Action 1 (Rule O

In [18]:
multilple_choice_answer_2 = answer_multiple_choice_question(multiple_choice_question_2, multiple_choice_options_2)
print(multilple_choice_answer_2.content)

To determine the most likely diagnosis for this patient, let's evaluate each option based on the provided clinical information.

### Action 1: Rule Out Evidence

- **A: Tracheobronchial injury**
  - Typically presents with respiratory distress, subcutaneous emphysema, and pneumothorax. The patient has increased jugular venous pressure and bilateral crackles, which are not classic for tracheobronchial injury.

- **B: Cardiac contusion**
  - Can cause arrhythmias and hypotension. The irregularly irregular pulse and absent p-waves suggest atrial fibrillation, which can occur with cardiac contusion. However, the rapid deterioration and increased jugular venous pressure are more suggestive of another condition.

- **C: Splenic rupture**
  - Would likely present with abdominal pain, tenderness, and signs of internal bleeding. The FAST exam was negative, and the abdomen is soft, making splenic rupture less likely.

- **D: Hemothorax**
  - Would present with decreased breath sounds and dullnes

In [20]:
multiple_choice_question_3 = answer_multiple_choice_question(multiple_choice_question_3, multiple_choice_options_3)
print(multiple_choice_question_3.content)

To address the question, let's evaluate each answer option based on the patient's presentation and the available evidence.

**Action 1: Rule Out Evidence for Each Option**

- **A: IV fluid resuscitation**
  - The patient is not showing signs of dehydration or orthostatic hypotension, and her vital signs are stable. Oral fluid resuscitation is already mentioned, suggesting that IV fluids may not be necessary at this point.

- **B: Administration of supplemental oxygen**
  - The patient is not exhibiting respiratory distress or hypoxia. The low pO2 and SO2 are likely due to venous blood gas sampling, which is not indicative of her actual oxygenation status.

- **C: Scheduled meal times**
  - While helpful in managing nausea, this option does not directly address the severity of her symptoms or provide immediate relief.

- **D: Monitoring and stress counseling**
  - While stress counseling may be beneficial, it does not address the acute management of her nausea and vomiting.

- **E: PO b

In [44]:
fine_tune_prompt = """
If the question were open-ended and "mechanical destruction of erythrocytes" wasn't explicitly listed, a clinician would need to methodically reason through the patient's presentation, physical findings, and lab results to deduce the most likely cause. Here's how a doctor might approach this case step-by-step:

### 1. **Assess the Patient’s Symptoms and History:**
   - **Fatigue and shortness of breath:** These are common symptoms of anemia, indicating reduced oxygen-carrying capacity of the blood.
   - **Recent upper respiratory infection:** This could suggest an immune or inflammatory trigger, potentially leading to hemolysis, but not necessarily indicating a specific type yet.
   - **Chronic conditions:** Hypertension and type 2 diabetes may not directly cause acute anemia, but they are important background details.
   - **Family history of systemic lupus erythematosus and bicuspid aortic valve:** These are relevant. Lupus could predispose to autoimmune conditions, while a bicuspid aortic valve suggests a possible genetic predisposition to aortic stenosis or other valve disorders.

### 2. **Examine the Physical Findings:**
   - **Pale conjunctivae:** A sign of anemia, confirming the clinical suspicion of low hemoglobin or hematocrit.
   - **Heart murmur:** This is a key finding. The **late systolic crescendo-decrescendo murmur** heard at the **right upper sternal border** is characteristic of **aortic stenosis**, which is often linked to a **bicuspid aortic valve**.
     - A bicuspid valve is prone to degeneration, leading to stenosis.
     - In stenosis, red blood cells passing through the turbulent flow across the valve can be mechanically damaged, leading to hemolysis.

### 3. **Interpret the Laboratory Findings:**
   - **Elevated bilirubin (1.9 mg/dL)** and **low haptoglobin (22 mg/dL):** These suggest hemolysis. Bilirubin is a breakdown product of hemoglobin, and haptoglobin binds free hemoglobin released from destroyed red blood cells.
   - **Normal LDH:** LDH is slightly elevated, which supports ongoing hemolysis but not at an overwhelming level.
   - **Leukocyte and platelet counts are normal:** This helps rule out conditions like bone marrow suppression or severe inflammatory disease that would alter other blood cell lines.

### 4. **Generate Hypotheses for the Cause of Hemolysis:**
   At this point, a clinician is aware that hemolysis is occurring but needs to figure out the underlying cause. The key decision-making process involves:
   - **Autoimmune hemolysis:** Given the recent infection, AIHA (autoimmune hemolytic anemia) is a reasonable consideration, especially with a family history of lupus. However, there is no mention of a Coombs test result (direct antibody test) that would be critical for confirming this diagnosis.
   - **Enzyme defects (like G6PD deficiency):** This can cause hemolysis, especially triggered by infections or certain medications. But there is no mention of drug triggers or typical findings like "bite cells" or "Heinz bodies" on the blood smear.
   - **Mechanical hemolysis:** The **murmur** and **family history of bicuspid aortic valve** are critical clues that suggest **mechanical destruction of erythrocytes** due to **aortic stenosis**. The turbulence created by the stenotic valve can shear red blood cells, leading to hemolysis, which would explain the low haptoglobin and high bilirubin.

### 5. **Synthesize the Information:**
   Based on the evidence:
   - **Hemolysis is confirmed** (low haptoglobin, elevated bilirubin).
   - **Aortic stenosis** is highly likely, given the heart murmur, family history, and patient age.
   - Mechanical destruction from the stenotic valve is the most plausible cause of hemolysis, given the absence of autoimmune markers or signs of other causes like enzyme defects.

### 6. **Final Deduction:**
   - Even without the option of "mechanical destruction of erythrocytes," the presence of a heart murmur (suggesting aortic stenosis) and the clear evidence of hemolysis would push a doctor toward the conclusion that **the heart valve abnormality is likely causing the anemia**. The clinical reasoning would point toward **mechanical hemolysis secondary to aortic stenosis**, especially in the context of a known family history of bicuspid aortic valve disease.

In summary, a doctor would start with general causes of anemia, narrow down to hemolysis based on lab findings, and use the heart murmur as the critical clue to deduce mechanical destruction of erythrocytes due to aortic stenosis.
"""

In [48]:
# Write a function to answer Multiple choice USMLE questions
def answer_multiple_choice_question_noexplain(question, options="None", fine_tune_prompt="None"):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases."
        ),
        (
            "user",
            """
            Answer the following multiple choice question: {question} {options} To answer this questions, do:
            - If 'options' is 'None', Generate 5 potential causes. the answer should follow {fine_tune_prompt}
            - Action 1: for each answer option, find evidences to rule them out
            - Action 2: for each answer option, find evidences that may lead to inference on the causal path of patient infos
            - reasoning over Action 1 and Action 2 in a debate
            - Criteria and Scoring System:
            1. Level of Evidence Supported in the Context (available in the text)
            High (3 points): Direct evidence in the clinical scenario strongly supports the cause.
            Moderate (2 points): Some evidence in the text supports the cause, but it's not conclusive.
            Low (1 point): Little or no evidence in the text supports the cause.
            2. Level of Evidence Supported in the Causal Path Inference (available through deduction)
            High (3 points): Strong deductive reasoning leads to this cause based on history, lab results, and the clinical picture.
            Moderate (2 points): Deductive reasoning could support this cause but requires more assumptions or is less likely.
            Low (1 point): Deductive reasoning provides little to no support for this cause.
            3. Amount of Next Step Examinations Needed for Ruling Out or Confirmation
            Low (3 points): Few or no additional tests are required because the current evidence is sufficient to make the diagnosis.
            Moderate (2 points): Some additional tests are required to confirm or rule out the cause.
            High (1 point): Several additional tests are needed because the current evidence is insufficient to confirm or rule out the cause.
            - Only return the answer with the highest confidence score, with format:
            '{
                "Answer_letter": "Answer option A, B, C, etc.",
                "level_of_evidence": "3, 2, 1",
                "level_of_deduction": "3, 2, 1",
                "amount_of_next_step": "3, 2, 1",
                "Answer_text": "Answer text"
            }'
            """.replace('{question}', question).replace('{options}', options).replace('{fine_tune_prompt}', fine_tune_prompt)
        )
    ]
    answer = llm.invoke(message)
    return answer

In [80]:
Norm_rubric_prompt = """
Criteria and Scoring System:
1. Level of Evidence Supported in the Context (available in the text)
High (3 points): Direct evidence in the clinical scenario strongly supports the cause.
Moderate (2 points): Some evidence in the text supports the cause, but it's not conclusive.
Low (1 point): Little or no evidence in the text supports the cause.
2. Level of Evidence Supported in the Causal Path Inference (available through deduction)
High (3 points): Strong deductive reasoning leads to this cause based on history, lab results, and the clinical picture.
Moderate (2 points): Deductive reasoning could support this cause but requires more assumptions or is less likely.
Low (1 point): Deductive reasoning provides little to no support for this cause.
3. Amount of Next Step Examinations Needed for Ruling Out or Confirmation
Low (3 points): Few or no additional tests are required because the current evidence is sufficient to make the diagnosis.
Moderate (2 points): Some additional tests are required to confirm or rule out the cause.
High (1 point): Several additional tests are needed because the current evidence is insufficient to confirm or rule out the cause.

Output format:
{
    "Answer_letter": "Answer option A, B, C, etc.",
    "level_of_evidence": "3, 2, 1",
    "level_of_deduction": "3, 2, 1",
    "amount_of_next_step": "3, 2, 1",
    "Answer_text": "Answer text"
}
"""

Criterion_rubric_prompt = """
Scoring guide:
High (3 points): The answer provides direct evidence or uses widely accepted medical guidelines.
Moderate (2 points): The answer relies on some inferential reasoning or partial evidence but is plausible.
Low (1 point): The answer lacks sufficient evidence, or there’s significant uncertainty
Deduction question reasoning:
Strength of Evidence: Does the answer cite relevant studies, guidelines, or well-established practices?
Relevance to Clinical Scenario: Is the answer directly addressing the scenario without speculation?
Logical Consistency: Is the answer logically coherent and free of contradictions?
Comprehensiveness: Does the answer account for all the symptoms or findings provided in the scenario?

Rubric:
1. Strength of Evidence
High (3 points): Supported by direct evidence or widely accepted clinical guidelines.
Moderate (2 points): Supported by inferential reasoning or partial evidence.
Low (1 point): Lacks sufficient evidence or has significant uncertainty.
2. Relevance to Clinical Scenario
High (3 points): Directly addresses the scenario without speculation.
Moderate (2 points): Partially addresses the scenario or includes some speculation.
Low (1 point): Lacks relevance to the scenario or is highly speculative.
3. Logical Consistency
High (3 points): Logically coherent and free of contradictions, strong deductive reasoning.
Moderate (2 points): Contains some deductive assumptions but generally coherent.
Low (1 point): Contains logical inconsistencies or contradictions.
4. Comprehensiveness
High (3 points): Accounts for all symptoms or findings in the scenario.
Moderate (2 points): Addresses most symptoms or findings but may miss some.
Low (1 point): Few symptoms considered or Fails to address key symptoms or findings.

Output format:
{
    "Strength of Evidence": "3, 2, 1",
    "Relevance to Clinical Scenario": "3, 2, 1",
    "Logical Consistency": "3, 2, 1",
    "Comprehensiveness": "3, 2, 1",
    "Answer": "Answer text",
    "Answer_letter": "Answer option A, B, C, etc. if present"
}
"""

Generatiion_Confidence_scoring_prompt = """
evaluate how confident they are about the answer’s accuracy based on the provided evidence.
Confidence scoring adds an extra layer of objectivity by assessing how well the answer fits with known medical knowledge or guidelines.
Confidence can be expressed on a 1 to 5 scale where raters score their certainty based on clinical evidence and reasoning:
5 (Very High Confidence): Strong clinical guidelines or consensus support the answer.
4 (High Confidence): Strong deductive reasoning and partial evidence support the answer.
3 (Moderate Confidence): The answer is plausible but lacks strong supporting evidence.
2 (Low Confidence): The answer requires further confirmation or evidence.
1 (Very Low Confidence): The answer is unlikely to be correct based on available information.
"""

# For the idea of breaking down the question into parts, I follow up by using this simple prompt to break down the question into parts
Structured_open_ended_response_prompt = """
Break down the question into parts:
Proposed Diagnosis
Proposed Treatment
Evidence/Reasoning Supporting Diagnosis
Evidence/Reasoning Supporting Treatment
"""

In [152]:
# Write a function to answer Open-ended USMLE questions

def answer_open_ended_question(question, fine_tune_prompt="None", options = "None", rubric_prompt="None"):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases."
        ),
        (
            'user',
            """
            If {options} is not 'None', follow {rubric_prompt}, return the best answer based on the rubric
            If {options} is 'None',
            Answer the following open-ended question: {question} To answer this questions, do:
            - If 'fine_tune_prompt' is not 'None' the answer should follow {fine_tune_prompt}
            - Action 1: Generate all potential causes. the answer should follow {fine_tune_prompt}
            - Action 2: for each potential cause, evaluate by using {rubric_prompt}. If 'rubric_prompt' is 'None', generate a rubric for evaluation
            Output format: follow the rubric.
            """.replace('{question}', question).replace('{fine_tune_prompt}', fine_tune_prompt).replace('{options}', options).replace('{rubric_prompt}', rubric_prompt)
        )
    ]
    answer = llm.invoke(message)
    return answer

In [61]:
open_ended_question_answer = answer_open_ended_question(multiple_choice_question, rubric_prompt=Criterion_rubric_prompt)
print(open_ended_question_answer.content)

**Action 1: Generate 5 potential causes for the patient's anemia.**

1. Hemolytic anemia due to drug-induced hemolysis.
2. Anemia of chronic disease.
3. Iron deficiency anemia.
4. Vitamin B12 or folate deficiency anemia.
5. Aplastic anemia.

**Action 2: Evaluate each potential cause using the scoring guide.**

1. **Hemolytic anemia due to drug-induced hemolysis**
   - **Strength of Evidence:** High (3 points) - The low haptoglobin, elevated bilirubin, and recent antibiotic use suggest hemolysis, which is a well-documented cause of anemia.
   - **Relevance to Clinical Scenario:** High (3 points) - Directly addresses the symptoms and lab findings, including the recent history of antibiotic use.
   - **Logical Consistency:** High (3 points) - The findings are consistent with hemolytic anemia.
   - **Comprehensiveness:** High (3 points) - Accounts for the anemia, jaundice, and recent medication history.

2. **Anemia of chronic disease**
   - **Strength of Evidence:** Moderate (2 points) - 

In [58]:
open_ended_question_answer = answer_open_ended_question(multiple_choice_question, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
print(open_ended_question_answer.content)

### Potential Causes of Anemia in This Patient:

1. **Mechanical Destruction of Erythrocytes (Hemolysis due to Aortic Stenosis):**
   - **Strength of Evidence:** High (3 points) - The presence of a heart murmur characteristic of aortic stenosis, combined with a family history of bicuspid aortic valve, strongly supports this diagnosis. The lab findings of low haptoglobin and elevated bilirubin are consistent with hemolysis.
   - **Relevance to Clinical Scenario:** High (3 points) - Directly addresses the scenario with the heart murmur and lab findings pointing towards mechanical hemolysis.
   - **Logical Consistency:** High (3 points) - The reasoning is coherent, linking the heart murmur to mechanical hemolysis.
   - **Comprehensiveness:** High (3 points) - Accounts for all symptoms and findings, including the heart murmur, family history, and lab results.

2. **Autoimmune Hemolytic Anemia (AIHA):**
   - **Strength of Evidence:** Moderate (2 points) - The recent infection could trigger 

In [59]:
open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
print(open_ended_question_answer_2.content)

To address the clinical scenario of the 46-year-old man involved in a motor vehicle collision, we need to determine the most likely cause of his rapid deterioration and subsequent death. Here is a step-by-step approach to the case:

### 1. **Assess the Patient’s Symptoms and History:**
   - **Unconsciousness and irregularly irregular pulse:** Suggests a possible cardiac arrhythmia, such as atrial fibrillation.
   - **Hypotension (systolic BP 60 mm Hg) and tachycardia (pulse 141/min):** Indicate shock, likely hypovolemic or cardiogenic.
   - **Increased jugular venous pressure and bilateral crackles:** Suggestive of cardiac tamponade or heart failure.

### 2. **Examine the Physical Findings:**
   - **Multiple ecchymoses over the chest:** Could indicate significant chest trauma.
   - **Lacerations over the abdomen and scalp:** Indicate potential for internal bleeding or head injury.
   - **Jugular venous distension and crackles:** Point towards cardiac tamponade or tension pneumothorax.


In [76]:
open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
print(open_ended_question_answer_2.content)

The scenario provided describes a 46-year-old man who was involved in a motor vehicle collision and presented with signs of severe trauma and hemodynamic instability. The key clinical findings include unconsciousness, irregularly irregular pulse, hypotension, increased jugular venous pressure, bilateral crackles, and a negative FAST exam. The ECG shows absent p-waves, and the patient deteriorates rapidly despite intervention.

Given these findings, the most likely diagnosis is cardiac tamponade secondary to traumatic injury. The increased jugular venous pressure, hypotension, and muffled heart sounds (though not explicitly mentioned, the absence of murmurs, rubs, or gallops could imply this) are classic signs of Beck's triad, which is indicative of cardiac tamponade. The irregularly irregular pulse and absent p-waves on ECG suggest atrial fibrillation, which can occur in the setting of cardiac tamponade due to atrial compression.

Here's how the evaluation would be structured:

### Eva

In [77]:
open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, options=multiple_choice_options_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
print(open_ended_question_answer_2.content)

To address the open-ended question regarding the most likely diagnosis for the 46-year-old man involved in a motor vehicle collision, we need to analyze the clinical presentation and findings:

### Clinical Presentation:
- **Unconsciousness**: Suggests severe injury or shock.
- **Respirations 24/min**: Indicates possible respiratory distress or compensation for shock.
- **Systolic BP 60 mm Hg, Pulse 141/min and irregularly irregular**: Suggests shock, possibly due to hemorrhage or cardiac dysfunction.
- **Multiple ecchymoses over the chest**: Indicates significant chest trauma.
- **Jugular venous pressure increased**: Suggests possible cardiac tamponade or tension pneumothorax.
- **Bilateral crackles at lung bases**: Could indicate pulmonary contusion or heart failure.
- **FAST negative**: Rules out significant intra-abdominal bleeding.
- **ECG shows absent p-waves**: Suggests atrial fibrillation, which can be secondary to cardiac contusion or tamponade.

### Potential Diagnoses:
1. **

In [81]:
open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, options=multiple_choice_options_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
print(open_ended_question_answer_2.content)

{
    "Strength of Evidence": "3",
    "Relevance to Clinical Scenario": "3",
    "Logical Consistency": "3",
    "Comprehensiveness": "3",
    "Answer": "Cardiac tamponade is the most likely diagnosis. The patient presents with hypotension, jugular venous distension, and tachycardia, which are classic signs of cardiac tamponade. The irregularly irregular pulse suggests atrial fibrillation, which can occur in the setting of cardiac tamponade. The negative FAST exam does not rule out cardiac tamponade as it may not detect small pericardial effusions. The rapid deterioration and death are consistent with the hemodynamic instability caused by cardiac tamponade.",
    "Answer_letter": "G"
}


In [83]:
open_ended_question_answer_q5 = answer_open_ended_question(q5, options=options_q5, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
print(open_ended_question_answer_q5.content)

{
    "Answer_letter": "A",
    "level_of_evidence": "3",
    "level_of_deduction": "3",
    "amount_of_next_step": "3",
    "Answer_text": "Placing the infant in a supine position on a firm mattress while sleeping"
}


In [79]:
open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, options=multiple_choice_options_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
print(open_ended_question_answer_2.content)

{
    "Answer_letter": "G",
    "level_of_evidence": "3",
    "level_of_deduction": "3",
    "amount_of_next_step": "3",
    "Answer_text": "Cardiac tamponade",
    "Answer_option": "Answer option G"
}


In [64]:
print(multiple_choice_options_2)


"A": "Tracheobronchial injury", "B": "Cardiac contusion", "C": "Splenic rupture", "D": "Hemothorax", "E": "Aortic dissection", "F": "Pulmonary contusion", "G": "Cardiac tamponade
""



In [68]:
mcq_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, options="multilple_choice_options_2", rubric_prompt=Criterion_rubric_prompt, fine_tune_prompt=fine_tune_prompt)
print(mcq_question_answer_2.content)

In the given scenario, the patient is a 46-year-old man who was involved in a motor vehicle collision and presented with several critical findings. The most likely diagnosis, considering the clinical presentation and the rapid deterioration leading to death, is cardiac tamponade. Here's the reasoning:

### 1. **Assess the Patient’s Symptoms and History:**
   - **Unconsciousness and hypotension:** These are indicative of severe shock or compromised cardiac output.
   - **Irregularly irregular pulse:** Suggests atrial fibrillation, which can be associated with cardiac trauma or underlying cardiac conditions.
   - **Increased jugular venous pressure:** A classic sign of elevated central venous pressure, often seen in cardiac tamponade.
   - **Bilateral crackles at lung bases:** Could indicate pulmonary congestion, possibly due to heart failure or fluid overload.

### 2. **Examine the Physical Findings:**
   - **Multiple ecchymoses over the chest:** Suggests significant blunt trauma, which

In [70]:
q5 = """
A 3-month-old baby died suddenly at night while asleep. His mother noticed that he had died only after she awoke in the morning. No cause of death was determined based on the autopsy. Which of the following precautions could have prevented the death of the baby?
"""

options_q5 = """
"A": "Placing the infant in a supine position on a firm mattress while sleeping", "B": "Routine postnatal electrocardiogram (ECG)", "C": "Keeping the infant covered and maintaining a high room temperature", "D": "Application of a device to maintain the sleeping position", "E": "Avoiding pacifier use during sleep"
"""

answer_q5 = "Placing the infant in a supine position on a firm mattress while sleeping"


In [71]:
open_ended_question_answer_q5 = answer_open_ended_question(q5, options=options_q5, rubric_prompt=Criterion_rubric_prompt, fine_tune_prompt=fine_tune_prompt)
print(open_ended_question_answer_q5.content)

To address the scenario of a 3-month-old baby who died suddenly at night while asleep, with no cause of death determined based on the autopsy, we need to consider potential precautions that could have prevented this tragic event. This situation is often associated with Sudden Infant Death Syndrome (SIDS), and there are several well-established guidelines aimed at reducing the risk of SIDS.

### Potential Precautions:

1. **Placing the Infant in a Supine Position on a Firm Mattress While Sleeping (A):**
   - **Strength of Evidence:** High (3 points) - This is supported by direct evidence and widely accepted guidelines, such as those from the American Academy of Pediatrics (AAP), which recommend placing infants on their backs to sleep to reduce the risk of SIDS.
   - **Relevance to Clinical Scenario:** High (3 points) - Directly addresses the scenario as improper sleeping position is a known risk factor for SIDS.
   - **Logical Consistency:** High (3 points) - This recommendation is logi

In [72]:
open_ended_question_answer_q5 = answer_open_ended_question(q5, rubric_prompt=Criterion_rubric_prompt, fine_tune_prompt=fine_tune_prompt)
print(open_ended_question_answer_q5.content)

To address the open-ended question regarding the sudden death of a 3-month-old baby during sleep, we need to consider potential causes and preventive measures. The scenario described is suggestive of Sudden Infant Death Syndrome (SIDS), a condition where an infant dies unexpectedly during sleep with no clear cause found even after an autopsy.

### Potential Preventive Measures for SIDS:

1. **Safe Sleep Environment:**
   - **Back to Sleep:** Always place the baby on their back to sleep, for naps and at night, to reduce the risk of SIDS.
   - **Firm Sleep Surface:** Use a firm mattress with a fitted sheet in a safety-approved crib. Avoid soft bedding, pillows, or toys in the crib.
   - **Room Sharing Without Bed Sharing:** Keep the baby's sleep area in the same room where you sleep for the first year, but on a separate surface designed for infants.

2. **Avoid Overheating:**
   - Dress the baby in light sleep clothing and keep the room at a comfortable temperature. Avoid heavy blankets 

In [60]:
open_ended_question_answer_2_1 = answer_open_ended_question(multiple_choice_question_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
print(open_ended_question_answer_2_1.content)

In this scenario, the patient presents with signs of severe trauma and hemodynamic instability following a motor vehicle collision. The key findings include unconsciousness, irregularly irregular pulse, hypotension, increased jugular venous pressure, bilateral crackles in the lungs, and a negative FAST exam. The ECG shows absent p-waves, suggesting atrial fibrillation. Despite resuscitative efforts, the patient deteriorates and dies.

### Potential Causes:

1. **Cardiac Tamponade:**
   - **Strength of Evidence:** High (3 points) - The increased jugular venous pressure, hypotension, and muffled heart sounds (though not explicitly mentioned) are classic signs of Beck's triad for cardiac tamponade.
   - **Relevance to Clinical Scenario:** High (3 points) - Directly addresses the scenario with the presence of trauma and hemodynamic instability.
   - **Logical Consistency:** High (3 points) - The symptoms align with cardiac tamponade, especially post-trauma.
   - **Comprehensiveness:** High

In [28]:
multiple_choice_question_3_noexplain = answer_multiple_choice_question_noexplain(multiple_choice_question_3, multiple_choice_options_3)
print(multiple_choice_question_3_noexplain.content)

To determine the most appropriate next step in management for this patient, let's evaluate each option:

**Action 1: Rule Out Evidence**

- **A: IV fluid resuscitation**: The patient is not orthostatic, and her vital signs are stable, suggesting she is not significantly dehydrated. Therefore, IV fluids may not be immediately necessary.
  
- **B: Administration of supplemental oxygen**: The patient is not hypoxic, and her oxygen saturation is adequate. The low pO2 and SO2 are likely due to venous blood gas, not arterial. Therefore, supplemental oxygen is not indicated.
  
- **C: Scheduled meal times**: While helpful in managing nausea, this does not address the acute symptoms or provide immediate relief.
  
- **D: Monitoring and stress counseling**: This is supportive care but does not address the acute symptoms of nausea and vomiting.
  
- **E: PO bismuth subsalicylate**: This is typically used for gastrointestinal upset related to diarrhea, not for nausea and vomiting in pregnancy.
  

In [31]:
multiple_choice_question_3_noexplain = answer_multiple_choice_question_noexplain(multiple_choice_question_3, multiple_choice_options_3)
print(multiple_choice_question_3_noexplain.content)

```json
{
    "Answer_letter": "H",
    "level_of_evidence": "3",
    "level_of_deduction": "3",
    "amount_of_next_step": "3",
    "Answer_text": "Addition of doxylamine"
}
```

**Reasoning:**

- **Action 1: Evidence to Rule Out Options**
  - **A (IV fluid resuscitation):** The patient is not orthostatic, indicating no significant dehydration, so IV fluids are not immediately necessary.
  - **B (Administration of supplemental oxygen):** The patient is not hypoxic; the low pO2 and SO2 are due to venous blood gas, not arterial.
  - **C (Scheduled meal times):** While helpful, this is not an immediate treatment for severe nausea and vomiting.
  - **D (Monitoring and stress counseling):** This addresses stress but not the acute symptoms.
  - **E (PO bismuth subsalicylate):** Not typically used for pregnancy-related nausea and vomiting.
  - **F (Trial of metoclopramide):** Could be considered, but doxylamine is first-line for pregnancy-related nausea.
  - **G (Discontinuation of ginger an

In [32]:
multiple_choice_question_4 = """
A 52-year-old man comes to to the emergency department with fatigue and shortness of breath that has become progressively worse over the past week. He had an upper respiratory tract infection 2 weeks ago, for which he was given an antibiotic. He has hypertension, type 2 diabetes mellitus, and colonic polyps diagnosed on screening colonoscopy 2 years ago. His mother has systemic lupus erythematosus and his brother has a bicuspid aortic valve. He does not smoke cigarettes or drink alcohol. Current medications include lisinopril and metformin. His temperature is 37.3°C (99.1°F), pulse is 91/min, respirations are 18/min, and blood pressure is 145/84 mm Hg. His conjunctivae are pale. Cardiac examination shows a late systolic crescendo-decrescendo murmur at the right upper sternal border. Laboratory studies show:\nLeukocyte Count 9,500/mm3\nHematocrit 24%\nPlatelet Count 178,000/mm3\nLDH 215 U/L\nHaptoglobin 22 mg/dL (N=41–165 mg/dL)\nSerum\nNa+ 140 mEq/L\nK+ 4.6 mEq/L\nCL- 100 mEq/L\nHCO3- 25 mEq/L\nUrea nitrogen 21 mg/dL\nCreatinine 1.2 mg/dL\nTotal bilirubin 1.9 mg/dL\nA peripheral blood smear is shown. Which of the following is the most likely cause of this patient's anemia?
"""

multiple_choice_options_4 = """
{"A": "Autoimmune destruction of erythrocytes", "B": "Occult blood loss", "C": "Erythrocyte membrane fragility", "D": "Erythrocyte enzyme defect", "E": "Mechanical destruction of erythrocytes"}
"""

In [33]:
multiple_choice_question_4_noexplain = answer_multiple_choice_question_noexplain(multiple_choice_question_4, multiple_choice_options_4)
print(multiple_choice_question_4_noexplain.content)

```json
{
    "Answer_letter": "E",
    "level_of_evidence": "3",
    "level_of_deduction": "3",
    "amount_of_next_step": "3",
    "Answer_text": "Mechanical destruction of erythrocytes"
}
```

**Reasoning:**

- **Action 1: Evidence to Rule Out Options**
  - **A: Autoimmune destruction of erythrocytes**: The patient does not have a history of autoimmune conditions, and there is no mention of positive direct Coombs test or other autoimmune markers.
  - **B: Occult blood loss**: The patient does not have symptoms or history suggestive of gastrointestinal bleeding or other sources of blood loss.
  - **C: Erythrocyte membrane fragility**: Conditions like hereditary spherocytosis are not suggested by the clinical history or lab findings.
  - **D: Erythrocyte enzyme defect**: There is no history of G6PD deficiency or similar enzyme defects, and the clinical picture does not suggest this.
  - **E: Mechanical destruction of erythrocytes**: The presence of a heart murmur and the lab findings 

In [38]:
multiple_choice_question_4_noexplain = answer_multiple_choice_question_noexplain(multiple_choice_question_4, options = "None")
print(multiple_choice_question_4_noexplain.content)

To address the question, we need to generate five potential causes for the patient's anemia and evaluate them based on the criteria provided.

### Potential Causes:
1. **Autoimmune Hemolytic Anemia (AIHA)**
2. **Drug-Induced Hemolytic Anemia**
3. **Iron Deficiency Anemia**
4. **Anemia of Chronic Disease**
5. **Thalassemia**

### Evaluation:

#### 1. Autoimmune Hemolytic Anemia (AIHA)
- **Evidence in Context**: 
  - High (3 points): The low haptoglobin, elevated bilirubin, and anemia suggest hemolysis, which is consistent with AIHA.
- **Causal Path Inference**: 
  - Moderate (2 points): The family history of autoimmune disease (mother with lupus) could suggest a predisposition to autoimmune conditions.
- **Next Step Examinations**: 
  - Moderate (2 points): A direct Coombs test would be needed to confirm AIHA.

#### 2. Drug-Induced Hemolytic Anemia
- **Evidence in Context**: 
  - High (3 points): Recent antibiotic use could trigger hemolysis, especially if the antibiotic is known to cau

In [43]:
multiple_choice_question_4_noexplain = answer_multiple_choice_question_noexplain(multiple_choice_question_4, options = "None")
print(multiple_choice_question_4_noexplain.content)

To address the question, we need to generate potential causes for the patient's anemia and evaluate them based on the criteria provided.

### Potential Causes of Anemia:
1. **Autoimmune Hemolytic Anemia (AIHA)**
2. **Drug-Induced Hemolytic Anemia**
3. **Iron Deficiency Anemia**
4. **Anemia of Chronic Disease**
5. **Thrombotic Thrombocytopenic Purpura (TTP)**

### Evaluation of Each Cause:

#### 1. Autoimmune Hemolytic Anemia (AIHA)
- **Evidence in Context**: 
  - Low haptoglobin and elevated bilirubin suggest hemolysis.
  - Pale conjunctivae indicate anemia.
  - No direct evidence of autoimmune etiology.
- **Causal Path Inference**: 
  - Moderate: Family history of autoimmune disease (mother with lupus) could suggest a predisposition.
- **Next Step Examinations**: 
  - Moderate: Direct Coombs test needed to confirm AIHA.

#### 2. Drug-Induced Hemolytic Anemia
- **Evidence in Context**: 
  - Recent antibiotic use could trigger hemolysis.
  - Low haptoglobin and elevated bilirubin suppor

In [50]:
multiple_choice_question_4_with_fine_tune = answer_multiple_choice_question_noexplain(multiple_choice_question_4, fine_tune_prompt)
print(multiple_choice_question_4_with_fine_tune.content)

To address the question, let's first generate potential causes of anemia in this patient:

1. Autoimmune hemolytic anemia (AIHA)
2. Mechanical destruction of erythrocytes (due to aortic stenosis)
3. Anemia of chronic disease
4. Iron deficiency anemia
5. Drug-induced hemolytic anemia

Now, let's evaluate each potential cause using the criteria and scoring system:

### Option 1: Autoimmune Hemolytic Anemia (AIHA)
- **Level of Evidence Supported in the Context:** Low (1 point)
  - There is no direct mention of a positive Coombs test or other autoimmune markers.
- **Level of Evidence Supported in the Causal Path Inference:** Moderate (2 points)
  - Family history of lupus could suggest a predisposition to autoimmune conditions, but no direct evidence of AIHA.
- **Amount of Next Step Examinations Needed for Ruling Out or Confirmation:** High (1 point)
  - A direct Coombs test would be needed to confirm AIHA.

### Option 2: Mechanical Destruction of Erythrocytes (due to Aortic Stenosis)
- **

In [52]:
multiple_choice_question_3_with_fine_tune = answer_multiple_choice_question_noexplain(multiple_choice_question_3, fine_tune_prompt)
print(multiple_choice_question_3_with_fine_tune.content)

To address the question, let's first analyze the clinical scenario and the provided lab results. The patient is experiencing significant nausea and vomiting during early pregnancy, which is consistent with hyperemesis gravidarum. The blood gas analysis shows a compensated metabolic alkalosis, which is typical in cases of prolonged vomiting due to loss of gastric acid.

Now, let's evaluate the potential management options:

### Action 1: Rule Out Options
1. **IV Fluid Resuscitation**: The patient is already receiving oral fluid resuscitation, and her orthostatic vital signs are normal, suggesting she is not significantly dehydrated.
2. **Antiemetic Medication**: The patient is taking ginger and vitamin B6 with limited relief, indicating that stronger antiemetic therapy might be needed.
3. **Hospitalization**: The patient is alert, oriented, and not orthostatic, suggesting that outpatient management might be sufficient.
4. **Nutritional Support**: There is no indication of severe malnutr

In [84]:
import csv
import json
import re
import pandas as pd

In [153]:
question_bank_jsonl = r'C:\Users\PC\NIT6001\CMG_prog_v3\MedQA-USMLE\questions\US\endoscopy_questions.jsonl'
# import jsonlines using pandas
df = pd.read_json(question_bank_jsonl, lines=True)
df.head()

Unnamed: 0,question,answer,options,meta_info
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2


In [88]:
# open_ended_question_answer_2 = answer_open_ended_question(multiple_choice_question_2, options=multiple_choice_options_2, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Norm_rubric_prompt)
# print(open_ended_question_answer_2.content)

In [95]:
# We will write a function to loop through all rows of question bank, extract the question and options, parse them into function answer_open_ended_question(question, options, fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt). append the result.content to a list

def answer_all_questions(df):
    answers = []
    for index, row in df.iterrows():
        question = row['question']
        options = row['options']
        answer = answer_open_ended_question(question, options=str(options), fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
        answers.append(answer.content)
    return answers

In [94]:
# check the first 5 rows of the dataframe using itterrows
for index, row in df.head().iterrows():
    print(type(row['question']), row['question'])
    print(type(row['options']), row['options'])

# row['options'] returns a dictionary, we will convert it to a string
for index, row in df.head().iterrows():
    print(type(row['question']), row['question'])
    print(type(str(row['options'])), str(row['options']))

<class 'str'> A 50-year-old man comes to the physician because of a 6-month history of difficulties having sexual intercourse due to erectile dysfunction. He has type 2 diabetes mellitus that is well controlled with metformin. He does not smoke. He drinks 5–6 beers daily. His vital signs are within normal limits. Physical examination shows bilateral pedal edema, decreased testicular volume, and increased breast tissue. The spleen is palpable 2 cm below the left costal margin. Abdominal ultrasound shows an atrophic, hyperechoic, nodular liver. An upper endoscopy is performed and shows dilated submucosal veins 2 mm in diameter with red spots on their surface in the distal esophagus. Therapy with a sildenafil is initiated for his erectile dysfunction. Which of the following is the most appropriate next step in management of this patient's esophageal findings?
<class 'dict'> {'A': 'Injection sclerotherapy', 'B': 'Nadolol therapy', 'C': 'Losaratan therapy', 'D': 'Octreotide therapy', 'E': '

In [96]:
# initiate the function
answers = answer_all_questions(df)

In [154]:
answers_2 = answer_all_questions(df)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [101]:
type(answers)

list

In [98]:
print(answers[0])

{
    "Strength of Evidence": "3",
    "Relevance to Clinical Scenario": "3",
    "Logical Consistency": "3",
    "Comprehensiveness": "3",
    "Answer": "Endoscopic band ligation is the most appropriate next step in the management of this patient's esophageal varices.",
    "Answer_letter": "F"
}


In [102]:
# convert answers[0] to a dictionary
answers_dict = json.loads(answers[0])
answers_dict

{'Strength of Evidence': '3',
 'Relevance to Clinical Scenario': '3',
 'Logical Consistency': '3',
 'Comprehensiveness': '3',
 'Answer': "Endoscopic band ligation is the most appropriate next step in the management of this patient's esophageal varices.",
 'Answer_letter': 'F'}

In [104]:
answers_dict['Answer']

"Endoscopic band ligation is the most appropriate next step in the management of this patient's esophageal varices."

In [99]:
# write function to save the answers to a file
filename = 'answers.txt'
def save_answers_to_file(answers, filename):
    with open(filename, 'w') as f:
        for answer in answers:
            f.write(answer + '\n')

# save the answers to a file
save_answers_to_file(answers, filename)


In [108]:
answers[0]

'{\n    "Strength of Evidence": "3",\n    "Relevance to Clinical Scenario": "3",\n    "Logical Consistency": "3",\n    "Comprehensiveness": "3",\n    "Answer": "Endoscopic band ligation is the most appropriate next step in the management of this patient\'s esophageal varices.",\n    "Answer_letter": "F"\n}'

In [106]:
# wirte a function to compare the answers to the correct answers
# iterrate through the dataframe and compare the answers to the correct answers
# return 'True' if the answer matches the correct answer and 'False' if it does not match
# match the answers['Answer_letter'] to the 'answer' column in the dataframe
# make new column to the dataframe: add answers['Answer'] to the dataframe, add a column 'correct' to the dataframe
# return the dataframe

def compare_answers(df, answers):
    for index, row in df.iterrows():
        for answer in answers:
            answer = json.loads(answer)
            if answer['Answer_letter'].strip().lower() == row['answer'].strip().lower():
                row['correct'] = True
                # add the answer to the dataframe
                row['LLM_answer'] = answer['Answer']
            else:
                row['correct'] = False
                # add the answer to the dataframe
                row['LLM_answer'] = answer['Answer']

    return df

# initiate the function
df_answer = compare_answers(df, answers)

In [109]:
# check the first 5 rows of the dataframe
df_answer.head()

Unnamed: 0,question,answer,options,meta_info
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2


In [123]:
answers[0]

'{\n    "Strength of Evidence": "3",\n    "Relevance to Clinical Scenario": "3",\n    "Logical Consistency": "3",\n    "Comprehensiveness": "3",\n    "Answer": "Endoscopic band ligation is the most appropriate next step in the management of this patient\'s esophageal varices.",\n    "Answer_letter": "F"\n}'

In [131]:
answers_dict = json.loads(answers[0])
# compare the answers to the correct answer of the first row
answers_dict['Answer_letter'], df_answer['answer'][0]
answers_dict['Answer_letter'] ==  df_answer['answer'][0]

False

In [143]:
df_answer['options'][0]

{'A': 'Injection sclerotherapy',
 'B': 'Nadolol therapy',
 'C': 'Losaratan therapy',
 'D': 'Octreotide therapy',
 'E': 'Isosorbide mononitrate therapy',
 'F': 'Endoscopic band ligation',
 'G': 'Transjugular intrahepatic portosystemic shunt',
 'H': 'Metoprolol therapy\n"'}

In [150]:
# print all answer letter of answers, compare to the correct answer in the dataframe
count = 0
for i in range(len(answers)):
    answers_dict = json.loads(answers[i])
    if answers_dict['Answer_letter'] == df_answer['answer'][i]:
        count += 1
    print(answers_dict['Answer_letter'], df_answer['answer'][i])
print(count)

F B
C C
B C
A A
F D
K K
D D
F F
E E
C C
None C
D F
F F
A A
C C
D D
A F
A E
D D
A A
None C
C C
C C
A C
A A
A E
C C
B G
C C
D D
D E
None B
B B
D C
C C
D F
C C
D D
A A
A A
A B
A A
A C
G G
B B
F C
D D
F A
C C
G G
C C
C C
B B
D D
H A
C B
C A
B B
A A
B B
B B
I I
F F
B B
B B
G G
G G
C D
E G
F F
C C
E B
D B
D D
B B
C C
C D
A A
None E
E E
None D
C B
G F
D E
None A
E E
A D
F F
A A
C C
F G
G G
B E
E G
C C
A A
None F
B B
E C
A A
D D
A A
D D
F A
None C
A A
B B
A D
F F
D D
F F
B B
G G
B B
E E
C C
A A
F D
E E
D D
C C
C C
C C
B B
A A
E E
B B
E C
D D
B D
F F
F F
G G
F B
None C
D A
C C
C D
C C
A F
B B
C C
D E
A F
C F
C C
C C
E E
A A
D E
F F
G G
D C
D E
E E
E E
B E
C B
D D
E E
E E
A A
 D
B B
F F
D D
E E
A D
B B
C C
B C
D D
B E
E E
B D
E E
B C
D D
E E
B B
C C
A A
E E
C A
A A
C B
B B
E E
E A
None D
D D
B B
A A
B B
B B
D D
D D
E B
D A
E A
E C
E D
B B
D D
C C
D A
B E
E B
D A
E E
E D
B B
D D
E E
D D
D D
None A
D D
D D
D A
D D
A A
E B
A B
A A
E E
A D
C C
A A
C C
B B
C C
D D
E C
A A
E B
D D
C C
E B
E E
None C
B

In [151]:
a = 335/502
a

0.6673306772908366

In [141]:
# compare the answers Answer_letter to the answer column in the dataframe, if they match, return True, else return False, add a new column 'correct' to the dataframe
# add the answer to the dataframe
# return the dataframe
def compare_answers(df, answers):
    for index, row in df.iterrows():
        for answer in answers:
            answer = json.loads(answer)
            if answer['Answer_letter'].strip().lower() == row['answer'].strip().lower():
                row['correct'] = True
                # add the answer to the dataframe
                row['LLM_answer'] = answer['Answer']
            else:
                row['correct'] = False
                # add the answer to the dataframe
                row['LLM_answer'] = answer['Answer']

    return df

# initiate the function
df_answer = compare_answers(df, answers)

In [142]:
df_answer.head()

Unnamed: 0,question,answer,options,meta_info,correct,LLM_answer,LLM_answer_letter
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2,True,The results of the study are best explained by...,B
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2,True,Malignant melanoma,C
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2,True,Malignant melanoma,C
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2,True,Arthroscopy is the most appropriate next step ...,A
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2,True,Fasting serum gastrin level,D


In [116]:
# count the number of correct answers
correct_answers = df_answer['correct'].sum()
total_questions = len(df_answer)
accuracy = correct_answers / total_questions

print(f"Number of correct answers: {correct_answers}")
print(f"Total number of questions: {total_questions}")
print(f"Accuracy: {accuracy:.2%}")



Number of correct answers: 502
Total number of questions: 502
Accuracy: 100.00%


In [122]:
# print options and LLM_answer to compare, and answer letter. We will check the first 5 rows
for index, row in df_answer.head().iterrows():
    print(row['options'], row['LLM_answer'], row['answer'])


{'A': 'Injection sclerotherapy', 'B': 'Nadolol therapy', 'C': 'Losaratan therapy', 'D': 'Octreotide therapy', 'E': 'Isosorbide mononitrate therapy', 'F': 'Endoscopic band ligation', 'G': 'Transjugular intrahepatic portosystemic shunt', 'H': 'Metoprolol therapy\n"'} The results of the study are best explained by effect modification. The presence of acid reflux modifies the effect of smoking on the risk of Barrett esophagus, as indicated by the lack of association between smoking and Barrett esophagus in both the presence and absence of acid reflux. B
{'A': 'Cholesterol embolism', 'B': 'Traumatic subungual hemorrhage', 'C': 'Malignant melanoma', 'D': 'Onychomycosis', 'E': 'Squamous cell carcinoma'} Malignant melanoma C
{'A': 'Random error', 'B': 'Effect modification', 'C': 'Confounding', 'D': 'Selection bias', 'E': 'Stratification', 'F': 'Matching'} Malignant melanoma C
{'A': 'Arthroscopy', 'B': 'Above knee cast', 'C': 'Knee brace only', 'D': 'Closed reduction', 'E': 'Total knee replacem

In [156]:
# write a function to answer multiple choice questions, simple prompt, zero shot, no explaination
# Write a function to answer Multiple choice USMLE questions
def answer_mcq_zeroshot(question, fine_tune_prompt="None", options = "None", rubric_prompt="None"):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            """
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc."
                "Reasoning": "Reasoning text"
            }
            """.replace('{question}', question).replace('{fine_tune_prompt}', fine_tune_prompt).replace('{options}', options).replace('{rubric_prompt}', rubric_prompt)
        )
    ]
    answer = llm.invoke(message)
    return answer

In [157]:
question_bank_jsonl = r'C:\Users\PC\NIT6001\CMG_prog_v3\MedQA-USMLE\questions\US\endoscopy_questions.jsonl'
# import jsonlines using pandas
df = pd.read_json(question_bank_jsonl, lines=True)
df.head()

Unnamed: 0,question,answer,options,meta_info
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2


In [158]:
# write a function to loop through all rows of question bank, extract the question and options, parse them into function answer_mcq_zeroshot. append the result.content to a list

def answer_all_questions_mcq_zeroshot(df):
    mcq_zeroshot_answers = []
    for index, row in df.iterrows():
        question = row['question']
        options = row['options']
        answer = answer_mcq_zeroshot(question, options=str(options), fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
        mcq_zeroshot_answers.append(answer.content)
    return mcq_zeroshot_answers

# initiate the function
mcq_zeroshot_answers = answer_all_questions_mcq_zeroshot(df)

In [164]:
# remove ```json\n ... \n``` wrapper from the answers
mcq_zeroshot_answers_clean = [re.sub(r'```json\n|\n```', '', answer) for answer in mcq_zeroshot_answers]

In [194]:
# save the answers to a file
filename = 'mcq_zeroshot_answers.txt'
save_answers_to_file(mcq_zeroshot_answers_clean, filename)

In [168]:
json.loads(mcq_zeroshot_answers_clean[0])['Answer_letter']

'B'

In [169]:
count = 0
for i in range(len(mcq_zeroshot_answers_clean)):
    answers_dict = json.loads(mcq_zeroshot_answers_clean[i])
    if answers_dict['Answer_letter'] == df['answer'][i]:
        count += 1
    print(answers_dict['Answer_letter'], df['answer'][i])
print(count)

B B
A C
C C
A A
F D
K K
D D
F F
E E
C C
C C
F F
F F
A A
C C
D D
F F
E E
D D
A A
A C
C C
C C
A C
A A
A E
C C
G G
C C
D D
D E
B B
B B
C C
C C
F F
C C
D D
A A
A A
B B
A A
C C
G G
B B
F C
D D
A A
C C
G G
C C
C C
B B
D D
H A
B B
D A
B B
A A
B B
B B
I I
C F
B B
B B
G G
G G
D D
E G
F F
C C
B B
B B
D D
B B
C C
E D
A A
E E
E E
D D
F B
G F
E E
A A
E E
D D
D F
A A
C C
G G
G G
B E
E G
C C
A A
F F
B B
E C
A A
D D
A A
D D
A A
C C
A A
B B
D D
F F
D D
F F
B B
G G
B B
E E
C C
A A
D D
E E
D D
C C
C C
C C
B B
A A
E E
B B
B C
A D
D D
F F
F F
F G
B B
C C
D A
C C
D D
C C
A F
A B
C C
D E
F F
C F
C C
C C
E E
A A
E E
A F
G G
C C
A E
E E
E E
E E
B B
D D
E E
E E
B A
D D
B B
F F
D D
E E
D D
B B
C C
C C
D D
E E
E E
D D
E E
C C
D D
E E
B B
D C
A A
E E
A A
A A
C B
B B
E E
A A
D D
D D
B B
A A
B B
B B
D D
D D
E B
A A
E A
B C
D D
B B
E D
C C
A A
B E
E B
D A
E E
C D
B B
B D
D E
D D
D D
A A
D D
D D
A A
D D
A A
B B
A B
A A
C E
A D
C C
A A
C C
B B
C C
D D
C C
A A
E B
C D
C C
E B
E E
C C
B B
B B
A A
A A
C B
A A
E E
E B
D A


In [170]:
a = 415/502
a

0.8266932270916335

In [177]:
def answer_mcq_with_rubric(question, fine_tune_prompt="None", options = "None", rubric_prompt="None"):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases."
        ),
        (
            'user',
            """
            Think step by step:
            If {options} is not 'None', follow {rubric_prompt}, return the best answer based on the rubric
            If {options} is 'None',
            Answer the following open-ended question: {question} To answer this questions, do:
            - If 'fine_tune_prompt' is not 'None' the answer should follow {fine_tune_prompt}
            - Action 1: Generate all potential causes. the answer should follow {fine_tune_prompt}
            - Action 2: for each potential cause, evaluate by using {rubric_prompt}. If 'rubric_prompt' is 'None', generate a rubric for evaluation
            Output format: follow the rubric, including reasoning
            """.replace('{question}', question).replace('{fine_tune_prompt}', fine_tune_prompt).replace('{options}', options).replace('{rubric_prompt}', rubric_prompt)
        )
    ]
    answer = llm.invoke(message)
    return answer

In [178]:
def answer_all_questions_mcq_with_rubric(df):
    mcq_with_rubric_answers = []
    for index, row in df.iterrows():
        question = row['question']
        options = row['options']
        answer = answer_mcq_with_rubric(question, options=str(options), fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
        mcq_with_rubric_answers.append(answer.content)
    return mcq_with_rubric_answers

In [179]:
# test answer_mcq_with_rubric function with the first row of the dataframe
question = df['question'][0]
options = df['options'][0]
answer = answer_mcq_with_rubric(question, options=str(options), fine_tune_prompt=fine_tune_prompt, rubric_prompt=Criterion_rubric_prompt)
answer.content


'To address the clinical scenario provided, we need to determine the most appropriate next step in managing the patient\'s esophageal findings, which are indicative of esophageal varices due to portal hypertension, likely secondary to liver cirrhosis. The patient has a history of alcohol use, which is a common cause of liver cirrhosis.\n\n### Evaluation of Potential Management Options:\n\n1. **Injection Sclerotherapy (A):**\n   - **Strength of Evidence:** Moderate (2 points). Sclerotherapy is an older method for managing esophageal varices but is less commonly used now due to the availability of more effective treatments.\n   - **Relevance to Clinical Scenario:** Moderate (2 points). It addresses the varices but is not the first-line treatment.\n   - **Logical Consistency:** Moderate (2 points). It is a logical option but not the most current standard of care.\n   - **Comprehensiveness:** Moderate (2 points). It addresses the varices but not the underlying portal hypertension.\n\n2. **

In [193]:
# print line 20 of the dataframe
df['question'][20], df['options'][20]


('A 35-year-old female comes to the physician because of a 2-year history of progressive fatigue and joint pain. She has a 1-year history of skin problems and a 4-month history of episodic pallor of her fingers. She reports that the skin of her face, neck, and hands is always dry and itchy; there are also numerous “red spots” on her face. She has become more “clumsy” and often drops objects. She has gastroesophageal reflux disease treated with lansoprazole. She does not smoke. She occasionally drinks a beer or a glass of wine. Her temperature is 36.5°C (97.7°F), blood pressure is 154/98 mm Hg, and pulse is 75/min. Examination shows hardening and thickening of the skin of face, neck, and hands. There are small dilated blood vessels around her mouth and on her oral mucosa. Mouth opening is reduced. Active and passive range of motion of the proximal and distal interphalangeal joints is limited. Cardiopulmonary examination shows no abnormalities. Her creatinine is 1.4 mg/dL. The patient is

In [174]:
mcq_with_rubric_answers = answer_all_questions_mcq_with_rubric(df)

In [176]:
# check the results
mcq_with_rubric_answers[0:10]

['{\n    "Strength of Evidence": "3",\n    "Relevance to Clinical Scenario": "3",\n    "Logical Consistency": "3",\n    "Comprehensiveness": "3",\n    "Answer": "The most appropriate next step in the management of this patient\'s esophageal varices, given the presence of red spots indicating a high risk of bleeding, is endoscopic band ligation. This procedure is a widely accepted first-line treatment for the prevention of variceal bleeding in patients with high-risk varices.",\n    "Answer_letter": "F"\n}',
 '{\n    "Strength of Evidence": "3",\n    "Relevance to Clinical Scenario": "3",\n    "Logical Consistency": "3",\n    "Comprehensiveness": "3",\n    "Answer": "The most likely diagnosis for the nontender skin lesion near the right large toenail in this clinical scenario is \'Cholesterol embolism\'. This is supported by the patient\'s history of acute myocardial infarction and the use of medications like aspirin and atorvastatin, which are associated with atherosclerosis and potent

In [190]:
answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', mcq_with_rubric_answers[4])

# Print the results
print(answer_letters)

['E']


In [192]:
count = 0
for i in range(len(mcq_with_rubric_answers)):
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', mcq_with_rubric_answers[i])
    
    # Check if answer_letters is not empty before accessing [0]
    if answer_letters:
        if answer_letters[0] == df['answer'][i]:
            count += 1
        print(answer_letters[0], df['answer'][i])
    else:
        # Print the entire line that couldn't be captured
        print(f"Couldn't capture the answer letter in line {i}: {mcq_with_rubric_answers[i]}")

print("Total correct answers:", count)


F B
A C
B C
A A
E D
K K
D D
F F
E E
C C
C C
F F
F F
A A
C C
D D
A F
E E
D D
A A
Couldn't capture the answer letter in line 20: {
    "Strength of Evidence": "3",
    "Relevance to Clinical Scenario": "3",
    "Logical Consistency": "3",
    "Comprehensiveness": "3",
    "Answer": "The patient is at increased risk for renal crisis, a complication associated with systemic sclerosis (scleroderma). The symptoms and findings, such as skin thickening, telangiectasia, and limited joint mobility, are consistent with scleroderma. The elevated blood pressure and creatinine level suggest potential renal involvement, which can lead to scleroderma renal crisis characterized by acute renal failure and severe hypertension.",
    "Answer_letter": "None"
}
C C
C C
G C
A A
A E
C C
B G
C C
D D
D E
Couldn't capture the answer letter in line 31: {
    "Strength of Evidence": "3",
    "Relevance to Clinical Scenario": "3",
    "Logical Consistency": "3",
    "Comprehensiveness": "3",
    "Answer": "The most

In [None]:
# we will write a RAG agent, to answer multiple choice questions. The agent will first trying to answer the question using zero shot, if it fails or the confidence is low, it will try to answer the question using a rubric prompt
# The agent is a LangChain agent.
# Another option, is that the agent will try to answer the question using a zero shot, if it fails, it will try to answer the question using RAG that query from our Neo4j CMG database


In [242]:

# Create the llm (done already)
# Create the embedding model (done already)
# Connect to the Neo4j database (done already)
# Create Agent

from langchain_core.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser

def zero_shot_agent_prompt(question, options="None", llm=cheap_llm):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            """
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc."
                "Reasoning": "Reasoning text"
                "Confidence": "Confidence score float 0-1"
            }
            """.replace('{question}', question).replace('{options}', options)
        )
    ]
    answer = llm.invoke(message)
    return answer.content

def zero_shot_ruibric_agent_prompt(question, options="None", fine_tune_prompt="None", rubric_prompt="None", llm=cheap_llm):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            """
            Think step by step:
            If {options} is not 'None', follow {rubric_prompt}, return the best answer based on the rubric
            If {options} is 'None',
            Answer the following open-ended question: {question} To answer this questions, do:
            - If 'fine_tune_prompt' is not 'None' the answer should follow {fine_tune_prompt}
            - Action 1: Generate all potential causes. the answer should follow {fine_tune_prompt}
            - Action 2: for each potential cause, evaluate by using {rubric_prompt}. If 'rubric_prompt' is 'None', generate a rubric for evaluation
            Output format: follow the rubric
            """.replace('{question}', question).replace('{options}', options).replace('{fine_tune_prompt}', fine_tune_prompt).replace('{rubric_prompt}', rubric_prompt)
        )
    ]
    answer = llm.invoke(message)
    return answer.content

# Design Tools

def cypher_qa(question, options):
    # embed the question and options using the embedding model: client.embeddings.create(...)
    question_embedding = client.embeddings.create(input=question, model="text-embedding-3-small")
    options_embedding = client.embeddings.create(input=options, model="text-embedding-3-small")

    # call the cypher query to query the Neo4j database, the cypher match (n)-[r]->(m) where n.em
    query = f"""
    MATCH (n)-[r]->(m)
    WHERE n.embedding IS NOT NULL AND m.embedding IS NOT NULL AND r.embedding IS NOT NULL
    WITH n, m, r, 
        gds.similarity.cosine(n.embedding, $query_embedding) AS head_similarity,
        gds.similarity.cosine(m.embedding, $query_embedding) AS tail_similarity,
        gds.similarity.cosine(r.embedding, $query_embedding) AS relation_similarity
    WHERE head_similarity > 0.5 OR tail_similarity > 0.5 OR relation_similarity > 0.5
    RETURN n, m, r, n.text AS n_text, m.text AS m_text, r.text AS r_text, head_similarity, tail_similarity, relation_similarity
    ORDER BY head_similarity DESC, tail_similarity DESC, relation_similarity DESC
    LIMIT $top_k
    """

    with driver.session() as session:
        result_question = session.run(query, query_embedding=question_embedding, top_k=5)
        result_options = session.run(query, query_embedding=options_embedding, top_k=5)
        return [(record['n_text'], record['r_text'], record['m_text']) for record in result_question], [(record['n_text'], record['r_text'], record['m_text']) for record in result_options]

# Create a set of tools
from langchain.tools import Tool

# Create a tool that will answer multiple choice questions using zero shot
zero_shot_mcq_tool = Tool.from_function(
    name = "zero_shot_mcq_tool",
    description = "Answer multiple choice questions using zero shot",
    func = zero_shot_agent_prompt
)

# Create a tool that will answer multiple choice questions using a rubric
zero_shot_rubric_mcq_tool = Tool.from_function(
    name = "zero_shot_rubric_mcq_tool",
    description = "Answer multiple choice questions using a rubric",
    func = zero_shot_ruibric_agent_prompt
)

# Create a tool that will answer multiple choice questions using cypher query from Neo4j
neo4j_mcq_tool = Tool.from_function(
    name = "neo4j_mcq_tool",
    description = "Answer multiple choice questions using cypher query from Neo4j",
    func = cypher_qa
)

In [201]:
# Create chat history callback
from langchain_community.chat_message_histories import Neo4jChatMessageHistory

def get_memory():
    return Neo4jChatMessageHistory(driver)



In [204]:
question_bank_jsonl = r'C:\Users\PC\NIT6001\CMG_prog_v3\MedQA-USMLE\questions\US\endoscopy_questions.jsonl'
# import jsonlines using pandas
df = pd.read_json(question_bank_jsonl, lines=True)
df.head()

Unnamed: 0,question,answer,options,meta_info
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2


In [240]:
# Sequential approach without 'confidence'
def answer_question_with_rag(question, options):
    try:
        # Step 1: Try to answer using the zero-shot tool first
        zero_shot_response = zero_shot_agent_prompt(str(question), str(options))
        if zero_shot_response and zero_shot_response.strip():  # Check if the response is valid
            return {
                "Tool Used": "Zero-Shot Tool",
                "Answer": zero_shot_response
            }
    except Exception as e:
        print(f"Zero-shot tool failed: {e}")

    try:
        # Step 2: If zero-shot fails, try the rubric-based tool
        rubric_response = zero_shot_ruibric_agent_prompt(str(question), str(options))
        if rubric_response and rubric_response.strip():  # Check if the response is valid
            return {
                "Tool Used": "Rubric-Based Tool",
                "Answer": rubric_response
            }
    except Exception as e:
        print(f"Rubric-based tool failed: {e}")

    try:
        # Step 3: If both zero-shot and rubric-based tools fail, query Neo4j
        cypher_response = cypher_qa(str(question), str(options))
        if cypher_response:  # Check if the response is valid
            return {
                "Tool Used": "Neo4j Query Tool",
                "Answer": cypher_response
            }
    except Exception as e:
        print(f"Neo4j tool failed: {e}")

    # If all tools fail, return an error message
    return {
        "Tool Used": "None",
        "Answer": "No valid answer found using any tool."
    }

# Example usage
question = df['question'][0]
options = df['options'][0]
answer = answer_question_with_rag(question, options)
print(answer)


{'Tool Used': 'Zero-Shot Tool', 'Answer': '{\n    "Answer": "Endoscopic band ligation",\n    "Answer_letter": "F",\n    "Reasoning": "The patient presents with signs of portal hypertension, evidenced by the palpable spleen, pedal edema, and esophageal varices seen on endoscopy. The presence of dilated submucosal veins with red spots indicates esophageal varices that are at risk of bleeding. The most appropriate management for esophageal varices, especially in a patient with significant liver disease, is endoscopic band ligation, which is effective in preventing variceal hemorrhage."\n}'}


In [212]:
# loop through the dataframe and answer the questions
agent_answers = []
for index, row in df.iterrows():
    question = row['question']
    options = row['options']
    answer = answer_question_with_rag(question, options)
    agent_answers.append(answer)

In [215]:
agent_answers[0]

# each answer is a dictionary, we will convert it to a string
agent_answers_str = [json.dumps(answer) for answer in agent_answers]

# save the answers to a file
filename = 'agent_answers.txt'
save_answers_to_file(agent_answers_str, filename)

In [233]:
agent_answers_str = [str(answer) for answer in agent_answers]

# save the answers to a file
filename = 'agent_answers_str.txt'
save_answers_to_file(agent_answers_str, filename)

In [221]:
agent_answers[0]

{'Tool Used': 'Zero-Shot Tool',
 'Answer': '```json\n{\n    "Answer": "Nadolol therapy",\n    "Answer_letter": "B",\n    "Reasoning": "The patient presents with signs of liver cirrhosis, including an atrophic, nodular liver on ultrasound, splenomegaly, and esophageal varices seen on endoscopy. The presence of varices with red spots indicates a high risk of bleeding. In such cases, non-selective beta-blockers like nadolol are recommended as a primary prophylaxis to reduce portal hypertension and prevent variceal bleeding. While endoscopic band ligation is also an option, it is typically reserved for patients who cannot tolerate beta-blockers or have contraindications. Therefore, initiating nadolol therapy is the most appropriate next step in managing this patient\'s esophageal varices."\n}\n```'}

In [229]:
# list all Tool Used; pattern: {'Tool Used': 'Zero-Shot Tool'
tool_list = []
for i in range(len(agent_answers)):
    tool = json.loads(agent_answers_str[i])['Tool Used']
    tool_list.append(tool)

In [232]:
print(len(tool_list))
# print unique tools
unique_tools = set(tool_list)
unique_tools

502


{'Zero-Shot Tool'}

In [219]:
# match 'Answer_letter' pattern in the answers to a list of answers

count = 0
for i in range(len(agent_answers)):
    # remo
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers[i]))
    
    # Check if answer_letters is not empty before accessing [0]
    if answer_letters:
        if answer_letters[0] == df['answer'][i]:
            count += 1
        print(answer_letters[0], df['answer'][i])
    else:
        # Print the entire line that couldn't be captured
        print(f"Couldn't capture the answer letter in line {i}: {agent_answers_str[i]}")

B B
A C
B C
A A
F D
K K
D D
F F
E E
C C
C C
F F
F F
A A
C C
D D
F F
E E
D D
A A
A C
C C
C C
A C
A A
A E
C C
G G
C C
D D
D E
B B
B B
C C
C C
F F
C C
D D
A A
A A
B B
A A
C C
G G
B B
F C
D D
A A
C C
G G
C C
C C
B B
D D
H A
B B
D A
B B
A A
B B
B B
I I
C F
B B
B B
G G
G G
D D
E G
F F
C C
B B
B B
D D
B B
C C
E D
A A
E E
E E
D D
F B
G F
E E
A A
E E
D D
D F
A A
C C
G G
G G
B E
E G
C C
A A
F F
B B
E C
A A
D D
A A
D D
A A
C C
A A
B B
D D
F F
D D
F F
B B
G G
B B
E E
C C
A A
D D
E E
D D
C C
C C
C C
B B
A A
E E
B B
B C
A D
D D
F F
F F
F G
B B
C C
A A
C C
D D
C C
A F
A B
C C
D E
F F
C F
C C
C C
E E
A A
E E
A F
G G
C C
A E
E E
E E
E E
B B
D D
E E
E E
B A
D D
B B
F F
D D
E E
D D
B B
C C
C C
D D
E E
E E
D D
E E
C C
D D
E E
B B
D C
A A
E E
A A
A A
C B
B B
E E
A A
D D
D D
B B
A A
B B
B B
D D
D D
E B
A A
E A
B C
D D
B B
E D
C C
A A
B E
E B
D A
E E
C D
B B
B D
D E
D D
D D
A A
D D
D D
A A
D D
A A
B B
A B
A A
C E
A D
C C
A A
C C
B B
C C
D D
C C
A A
E B
C D
C C
E B
E E
C C
B B
B B
A A
A A
C B
A A
E E
E B
D A


In [220]:
count

414

In [243]:
a = 415/502
a

0.8266932270916335

In [208]:
options

{'A': 'Injection sclerotherapy',
 'B': 'Nadolol therapy',
 'C': 'Losaratan therapy',
 'D': 'Octreotide therapy',
 'E': 'Isosorbide mononitrate therapy',
 'F': 'Endoscopic band ligation',
 'G': 'Transjugular intrahepatic portosystemic shunt',
 'H': 'Metoprolol therapy\n"'}

In [None]:
# redesign Tools to include 

In [237]:
cheap_llm = ChatOpenAI(
    model = "gpt-4o-mini",
    api_key = "sk-proj-pjGUyMNiHPDiH7CLYv4rT3BlbkFJQpZiY1U0sYAqOATygpnJ",
    temperature = 0.0,
    max_tokens = None,
)

In [None]:
# loop through the dataframe and answer the questions
agent_answers = []
for index, row in df.iterrows():
    question = row['question']
    options = row['options']
    answer = answer_question_with_rag(question, options)
    agent_answers.append(answer)

In [280]:
# Update the zero_shot_agent_prompt to include a confidence score in the return value
def zero_shot_agent_prompt(question, options="None", llm=cheap_llm):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            f"""
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {{
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }}
            """
        )
    ]
    answer = llm.invoke(message)
    return answer.content

# Assuming cypher_qa also returns a confidence score, modify its return statement if needed.
def cypher_qa(question, options):
    # Embed the question and options using the embedding model
    question_embedding_response = client.embeddings.create(input=question, model="text-embedding-3-small")
    options_embedding_response = client.embeddings.create(input=options, model="text-embedding-3-small")

    question_embedding = question_embedding_response.data[0].embedding
    options_embedding = options_embedding_response.data[0].embedding

    # Cypher query to query the Neo4j database
    query = f"""
    MATCH (n)-[r]->(m)
    WHERE n.embedding IS NOT NULL AND m.embedding IS NOT NULL AND r.embedding IS NOT NULL
    WITH n, m, r, 
        gds.similarity.cosine(n.embedding, $query_embedding) AS head_similarity,
        gds.similarity.cosine(m.embedding, $query_embedding) AS tail_similarity,
        gds.similarity.cosine(r.embedding, $query_embedding) AS relation_similarity
    WHERE head_similarity > 0.3 OR tail_similarity > 0.3 OR relation_similarity > 0.3
    RETURN n, m, r, n.text AS n_text, m.text AS m_text, r.text AS r_text, head_similarity, tail_similarity, relation_similarity
    ORDER BY head_similarity DESC, tail_similarity DESC, relation_similarity DESC
    LIMIT $top_k
    """

    with driver.session() as session:
        result_question = session.run(query, query_embedding=question_embedding, top_k=5)
        result_options = session.run(query, query_embedding=options_embedding, top_k=5)
        # Here you should calculate the confidence based on the results, or modify accordingly
        # confidence_score = average of similarity of head_similarity, tail_similarity, and relation_similarity, hence it is:
        answers_question = [(record['n_text'] + " " + record['r_text'] + " " + record['m_text']) for record in result_question]
        answers_option = [(record['n_text'] + " " + record['r_text'] + " " + record['m_text']) for record in result_options]
        return answers_question, answers_option


# The above function assumes you will implement a way to derive the confidence score from your Cypher query results.

def cypher_to_prompt_response(answer_question, answer_option, question, options, llm=cheap_llm):
    # feed answers_question and answers_option to a prompt as additional context to generate a response
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases."
        ),
        (
            'user',
            """
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}, given additional context from
            {answer_question} and {answer_option}
            Output format:
            {
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }
            """.replace('{question}', question).replace('{options}', options).replace('{answer_question}', answer_question).replace('{answer_option}', answer_option)
        )
    ]
    answer = llm.invoke(message)
    return answer.content


In [253]:
question = df['question'][0]
question

"A 50-year-old man comes to the physician because of a 6-month history of difficulties having sexual intercourse due to erectile dysfunction. He has type 2 diabetes mellitus that is well controlled with metformin. He does not smoke. He drinks 5–6 beers daily. His vital signs are within normal limits. Physical examination shows bilateral pedal edema, decreased testicular volume, and increased breast tissue. The spleen is palpable 2 cm below the left costal margin. Abdominal ultrasound shows an atrophic, hyperechoic, nodular liver. An upper endoscopy is performed and shows dilated submucosal veins 2 mm in diameter with red spots on their surface in the distal esophagus. Therapy with a sildenafil is initiated for his erectile dysfunction. Which of the following is the most appropriate next step in management of this patient's esophageal findings?"

In [282]:
#test cypher_qa function
question = df['question'][0]
options = df['options'][0]

answers_question, answers_option = cypher_qa(question, options)

cypher_answer = cypher_to_prompt_response(answer_question=str(answers_question), answer_option=str(answers_option), question=question, options=str(options))



In [283]:
cypher_answer

'{\n    "Answer": "Endoscopic band ligation",\n    "Answer_letter": "F",\n    "Reasoning": "The patient has esophageal varices due to portal hypertension likely secondary to liver cirrhosis, as indicated by the atrophic, hyperechoic, nodular liver on ultrasound. Endoscopic band ligation is the preferred method for managing esophageal varices to prevent bleeding, as it directly addresses the varices themselves. Other options like sclerotherapy or pharmacological treatments may be considered, but band ligation is the most appropriate next step in this scenario.",\n    "Confidence": 0.9\n}'

In [307]:
# create an agent that will use the zero_shot_agent_prompt, cypher_qa, and cypher_to_prompt_response functions to answer the questions
# The agent will first try to answer the question using zero_shot_agent_prompt, if confidence level <0.6, it will try to answer the question using cypher_qa, and cypher_to_prompt_response functions

def answer_question_with_rag(question, options):
    initial_answer = zero_shot_agent_prompt(str(question), str(options))
    # Extract the confidence score from the initial answer using regex
    confidence = re.search(r'"Confidence":\s*([0-9]*\.[0-9]+)', str(initial_answer))
    print(confidence)
    if confidence and float(confidence.group(1)) < 0.9:
        answers_question, answers_option = cypher_qa(str(question), str(options))
        cypher_answer = cypher_to_prompt_response(answer_question=str(answers_question), answer_option=str(answers_option), question=question, options=str(options))
        return {
            "Tool Used": "Cypher Query Tool",
            "Answer": cypher_answer
        }
    else:
        return {
            "Tool Used": "Zero-Shot Tool",
            "Answer": initial_answer
        }
    

In [288]:
df.head()

Unnamed: 0,question,answer,options,meta_info
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2


In [308]:
# test the agent with the first 5 rows of the dataframe
agent_answers = []
for index, row in df.head().iterrows():
    question = row['question']
    options = row['options']
    answer = answer_question_with_rag(question=question, options=options)
    agent_answers.append(answer)



<re.Match object; span=(594, 611), match='"Confidence": 0.9'>
<re.Match object; span=(814, 832), match='"Confidence": 0.85'>
<re.Match object; span=(655, 673), match='"Confidence": 0.95'>
<re.Match object; span=(592, 609), match='"Confidence": 0.9'>
<re.Match object; span=(543, 560), match='"Confidence": 0.9'>


In [309]:
agent_answers

[{'Tool Used': 'Zero-Shot Tool',
  'Answer': '{\n    "Answer": "Endoscopic band ligation",\n    "Answer_letter": "F",\n    "Reasoning": "The patient has signs of portal hypertension, indicated by the presence of esophageal varices (dilated submucosal veins with red spots) due to his atrophic, nodular liver, likely from alcoholic liver disease. Endoscopic band ligation is the preferred treatment for esophageal varices to prevent bleeding, especially in patients with significant liver disease. While beta-blockers like Nadolol can be used for primary prophylaxis, the presence of existing varices necessitates immediate intervention.",\n    "Confidence": 0.9\n}'},
 {'Tool Used': 'Cypher Query Tool',
  'Answer': '{\n    "Answer": "Cholesterol embolism",\n    "Answer_letter": "A",\n    "Reasoning": "The patient is a 65-year-old woman with a history of cardiovascular disease and risk factors such as hypertension, hyperlipidemia, and a significant smoking history. The presence of a nontender sk

In [310]:
# loop through the dataframe and answer the questions
agent_answers = []
for index, row in df.iterrows():
    question = row['question']
    options = row['options']
    answer = answer_question_with_rag(question, options)
    agent_answers.append(answer)

<re.Match object; span=(604, 621), match='"Confidence": 0.9'>
<re.Match object; span=(596, 614), match='"Confidence": 0.85'>
<re.Match object; span=(655, 672), match='"Confidence": 0.9'>
<re.Match object; span=(592, 609), match='"Confidence": 0.9'>
<re.Match object; span=(524, 541), match='"Confidence": 0.9'>
<re.Match object; span=(618, 635), match='"Confidence": 0.9'>
<re.Match object; span=(491, 508), match='"Confidence": 0.9'>
<re.Match object; span=(590, 607), match='"Confidence": 0.9'>
<re.Match object; span=(584, 601), match='"Confidence": 0.9'>
<re.Match object; span=(732, 750), match='"Confidence": 0.85'>
<re.Match object; span=(645, 662), match='"Confidence": 0.9'>
<re.Match object; span=(756, 773), match='"Confidence": 0.9'>
<re.Match object; span=(500, 518), match='"Confidence": 0.85'>
<re.Match object; span=(649, 667), match='"Confidence": 0.85'>
<re.Match object; span=(628, 645), match='"Confidence": 0.9'>
<re.Match object; span=(575, 593), match='"Confidence": 0.95'>
<re

In [319]:
# save the answers to a file
filename = 'agent_answer_with_cypher.txt'
# convert each answer to a string
agent_answers_str = [str(answer) for answer in agent_answers]
save_answers_to_file(agent_answers_str, filename)



In [312]:
# get all answers into a list
agent_answers_str = [json.dumps(answer) for answer in agent_answers]


In [314]:
#get Answer_letter from the answers
count = 0
for i in range(len(agent_answers)):
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers[i]))
    
    # Check if answer_letters is not empty before accessing [0]
    if answer_letters:
        if answer_letters[0] == df['answer'][i]:
            count += 1
        print(answer_letters[0], df['answer'][i])
    else:
        # Print the entire line that couldn't be captured
        print(f"Couldn't capture the answer letter in line {i}: {agent_answers_str[i]}")

F B
A C
B C
A A
D D
K K
D D
F F
E E
C C
C C
D F
F F
A A
C C
D D
B F
E E
D D
H A
F C
B C
C C
B C
A A
C E
C C
B G
C C
D D
D E
C B
B B
B C
C C
D F
C C
D D
A A
A A
A B
A A
A C
B G
B B
C C
D D
F A
C C
A G
B C
C C
B B
D D
H A
B B
C A
B B
A A
D B
A B
D I
C F
B B
B B
G G
D G
C D
D G
F F
C C
B B
B B
D D
B B
B C
C D
E A
A E
E E
A D
C B
G F
E E
C A
A E
D D
D F
A A
C C
G G
G G
B E
E G
B C
A A
E F
B B
E C
A A
D D
A A
D D
A A
C C
D A
B B
D D
E F
D D
F F
B B
G G
B B
E E
C C
B A
F D
E E
D D
C C
C C
C C
B B
A A
B E
B B
E C
A D
D D
F F
F F
G G
D B
A C
D A
C C
C D
C C
A F
A B
C C
D E
F F
F F
C C
C C
E E
B A
E E
F F
C G
E C
C E
E E
E E
B E
D B
D D
E E
E E
A A
D D
B B
F F
D D
E E
A D
A B
C C
E C
D D
B E
E E
C D
E E
D C
D D
E E
A B
D C
E A
E E
A A
A A
C B
C B
E E
A A
D D
A D
B B
A A
B B
B B
D D
A D
E B
C A
A A
B C
D D
B B
E D
A C
D A
B E
E B
D A
E E
A D
B B
B D
A E
C D
D D
A A
D D
D D
B A
B D
A A
E B
A B
A A
E E
A D
C C
A A
C C
B B
C C
D D
E C
A A
E B
C D
D C
E B
E E
C C
B B
B B
A A
A A
C B
A A
A E
E B
C A


In [316]:
accuracy = count/len(agent_answers)
accuracy

0.647410358565737

In [328]:
#check the first 5 rows of the answers
for i in range(5):
    print(agent_answers_str[i])

{'Tool Used': 'Zero-Shot Tool', 'Answer': '{\n    "Answer": "Endoscopic band ligation",\n    "Answer_letter": "F",\n    "Reasoning": "The patient has signs of portal hypertension, indicated by the presence of esophageal varices (dilated submucosal veins with red spots) due to his atrophic, nodular liver, likely from alcoholic liver disease. Endoscopic band ligation is the preferred method for managing esophageal varices to prevent bleeding, especially in patients with significant liver disease. While beta-blockers like Nadolol can be used for primary prophylaxis, the immediate management of existing varices is endoscopic band ligation.",\n    "Confidence": 0.9\n}'}
{'Tool Used': 'Cypher Query Tool', 'Answer': '{\n    "Answer": "Cholesterol embolism",\n    "Answer_letter": "A",\n    "Reasoning": "The patient is a 65-year-old woman with a history of cardiovascular disease and risk factors such as hypertension, hyperlipidemia, and a significant smoking history. The presence of a nontender

In [332]:
# Check the accuracy of zero-shot tool and cypher query tool
zero_shot_count = 0
zero_shot_correct = 0
cypher_count = 0
cypher_correct = 0
for i in range(len(agent_answers)):
    if agent_answers[i]['Tool Used'] == 'Zero-Shot Tool':
        zero_shot_count += 1
    elif agent_answers[i]['Tool Used'] == 'Cypher Query Tool':
        cypher_count += 1

print(f"Zero-Shot Tool count: {zero_shot_count}")
print(f"Cypher Query Tool count: {cypher_count}")

Zero-Shot Tool count: 341
Cypher Query Tool count: 161


In [337]:

zero_shot_count = 341
zero_shot_correct = 0
cypher_count = 161
cypher_correct = 0
for i in range(len(agent_answers)):
    if agent_answers[i]['Tool Used'] == 'Zero-Shot Tool':
        answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers[i]))
        if answer_letters:
            if answer_letters[0] == df['answer'][i]:
                zero_shot_correct += 1
    elif agent_answers[i]['Tool Used'] == 'Cypher Query Tool':
        answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers[i]))
        if answer_letters:
            if answer_letters[0] == df['answer'][i]:
                cypher_correct += 1


print(f"Zero-Shot Tool correct answers: {zero_shot_correct}/341")
print(f"Cypher Query Tool correct answers: {cypher_correct}/161")

zero_shot_accuracy = zero_shot_correct / zero_shot_count
cypher_accuracy = cypher_correct / cypher_count

print(f"Zero-Shot Tool accuracy: {zero_shot_accuracy:.2%}")
print(f"Cypher Query Tool accuracy: {cypher_accuracy:.2%}")

Zero-Shot Tool correct answers: 253/341
Cypher Query Tool correct answers: 72/161
Zero-Shot Tool accuracy: 74.19%
Cypher Query Tool accuracy: 44.72%


In [331]:
# check  agent_answers[0]['Answer']
print(agent_answers[1]['Tool Used'])
print(agent_answers[1]['Answer'])
# Convert the answer to a dictionary
answer_dict = json.loads(agent_answers[1]['Answer'])
answer_dict['Answer_letter']

Cypher Query Tool
{
    "Answer": "Cholesterol embolism",
    "Answer_letter": "A",
    "Reasoning": "The patient is a 65-year-old woman with a history of cardiovascular disease and risk factors such as hypertension, hyperlipidemia, and a significant smoking history. The presence of a nontender skin lesion near the right large toenail raises suspicion for a cholesterol embolism, especially given her recent myocardial infarction and the potential for atheroembolism. Cholesterol emboli can present as blue or purple lesions on the skin, often in the lower extremities, and are associated with systemic atherosclerosis. The other options do not fit the clinical picture as well as cholesterol embolism does.",
    "Confidence": 0.85
}


'A'

In [379]:
import json

# Step 1: Read the file content
filename = 'agent_answers.txt'
with open(filename, 'r') as file:
    raw_content = file.read()

# Step 2: Clean the content
# Remove markdown code block markers and backticks
cleaned_content = raw_content.replace('```json', '').replace('```', '').strip()

# Split the content into individual JSON strings (if there are multiple lines of answers)
json_entries = cleaned_content.splitlines()

# Step 3: Load each entry as a dictionary
answers = []
for entry in json_entries:
    try:
        # Convert the escaped strings into valid JSON format
        entry_dict = json.loads(entry)
        # If the 'Answer' field is still a string that looks like a JSON, we need to parse it again
        if 'Answer' in entry_dict and isinstance(entry_dict['Answer'], str):
            answer_str = entry_dict['Answer']
            # Now clean and load the inner JSON in 'Answer'
            inner_answer = json.loads(answer_str.replace('```json', '').replace('```', '').strip())
            entry_dict['Answer'] = inner_answer
        answers.append(entry_dict)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        continue

# Step 4: Now `answers` is a list of dictionaries, each representing the content of one entry
print(answers)




[{'Tool Used': 'Zero-Shot Tool', 'Answer': {'Answer': 'Nadolol therapy', 'Answer_letter': 'B', 'Reasoning': "The patient presents with signs of liver cirrhosis, including an atrophic, nodular liver on ultrasound, splenomegaly, and esophageal varices seen on endoscopy. The presence of varices with red spots indicates a high risk of bleeding. In such cases, non-selective beta-blockers like nadolol are recommended as a primary prophylaxis to reduce portal hypertension and prevent variceal bleeding. While endoscopic band ligation is also an option, it is typically reserved for patients who cannot tolerate beta-blockers or have contraindications. Therefore, initiating nadolol therapy is the most appropriate next step in managing this patient's esophageal varices."}}, {'Tool Used': 'Zero-Shot Tool', 'Answer': {'Answer': 'Cholesterol embolism', 'Answer_letter': 'A', 'Reasoning': "The clinical scenario describes a 65-year-old woman with a history of cardiovascular disease and recent percutaneo

In [383]:
answers[0]['Tool Used']

'Zero-Shot Tool'

In [391]:
# Query the answer of all wrong answers
wrong_answers = []
for i in range(len(answers)):
    if answers[i]['Answer']['Answer_letter'] != df['answer'][i]:
        # Add the correct answer to the dictionary, including the True answer text
        answers[i]['True_answer'] = {'True_answer_letter': df['answer'][i], 'True_answer_text': df['options'][i][df['answer'][i]]}
        # add question to the dictionary
        answers[i]['Question'] = df['question'][i]
        wrong_answers.append(answers[i])

        # answers[i]['True_answer'] = df['answer'][i]
        # wrong_answers.append(answers[i])




In [392]:
wrong_answers

[{'Tool Used': 'Zero-Shot Tool',
  'Answer': {'Answer': 'Cholesterol embolism',
   'Answer_letter': 'A',
   'Reasoning': "The clinical scenario describes a 65-year-old woman with a history of cardiovascular disease and recent percutaneous coronary intervention, who presents with a nontender skin lesion near the toenail. The presence of an S4 heart sound and her history of smoking and hyperlipidemia suggest underlying atherosclerosis. The nontender skin lesion could be a sign of cholesterol embolism, which can occur after vascular procedures and is characterized by 'blue toe syndrome' or livedo reticularis. The absence of trauma or signs of infection makes traumatic subungual hemorrhage and onychomycosis less likely. Malignant melanoma and squamous cell carcinoma are possible but less likely given the context and description of the lesion."},
  'True_answer': {'True_answer_letter': 'C',
   'True_answer_text': 'Malignant melanoma'},
  'Question': 'A 65-year-old Asian woman comes to the p

In [393]:
# save wrong answers to a xlsx file
filename = 'wrong_answers.xlsx'
df_wrong_answers = pd.DataFrame(wrong_answers)
df_wrong_answers.to_excel(filename, index=False)

In [None]:
# ask the agent to explain the wrong answers

def explain_wrong_answer(wrong_answers):
    self_correction_prompt = """
    Based on the following clinical scenario {question}, you answered the question incorrectly.
    Please provide an explanation for why you chose the answer you did.
    """

In [403]:
feed_back_instruction_prompt = """
Incorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.
Expand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.
Adopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.
Prioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.
Refine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effectively differentiate between similar cases.
Deepen Knowledge Integration: you have knowledge in specialized medical areas (such as vaccination schedules or specific conditions) to improve your application of knowledge across varied contexts.
"""

In [404]:
def zero_shot_agent_prompt_with_instruction_feedback(question, instruct=feed_back_instruction_prompt, options="None", llm=cheap_llm):
    message = [
        (
            'system',
            f"You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: {instruct}"
        ),
        (
            'user',
            f"""
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {{
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }}
            """
        ),
        (
            'system',
            feed_back_instruction_prompt
        )
    ]
    print(message)
    answer = llm.invoke(message)
    return answer.content

In [405]:
df.head()

# test the function
question = df['question'][0]
options = df['options'][0]
answer = zero_shot_agent_prompt_with_instruction_feedback(question, options=options)
answer

[('system', 'You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: \nIncorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.\nExpand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.\nAdopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.\nPrioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.\nRefine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effect

'{\n    "Answer": "Endoscopic band ligation",\n    "Answer_letter": "F",\n    "Reasoning": "The patient presents with signs of portal hypertension, evidenced by the palpable spleen, pedal edema, and esophageal varices seen on endoscopy. The presence of dilated submucosal veins with red spots indicates esophageal varices that are at risk of bleeding. In this context, endoscopic band ligation is the preferred management for variceal bleeding or prophylaxis against bleeding, especially in patients with significant portal hypertension. While beta-blockers like Nadolol can be used for primary prophylaxis of variceal bleeding, the immediate management of existing varices is endoscopic intervention. Therefore, endoscopic band ligation is the most appropriate next step in this patient\'s management.",\n    "Confidence": 0.9\n}'

In [406]:
def zero_shot_agent_prompt_with_instruction_feedback_modified(question, instruct=feed_back_instruction_prompt, options="None", llm=cheap_llm):
    message = [
        (
            'system',
            f"You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: {instruct}"
        ),
        (
            'user',
            f"""
            Based on the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {{
                "Incorporate Context-Specific Details": "Incorporate Context-Specific Details text",
                "Expand Diagnostic Considerations": "Expand Diagnostic Considerations text",
                "Adopt a Holistic Approach to Reasoning": "Adopt a Holistic Approach to Reasoning text",
                "Prioritize Recent and Relevant Information": "Prioritize Recent and Relevant Information text",
                "Refine Diagnostic Logic Pathways": "Refine Diagnostic Logic Pathways text",
                "Deepen Knowledge Integration": "Deepen Knowledge Integration text",
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
            }}
            """
        ),
        (
            'system',
            feed_back_instruction_prompt
        )
    ]
    print(message)
    answer = llm.invoke(message)
    return answer.content

In [407]:
question = df['question'][0]
options = df['options'][0]
answer = zero_shot_agent_prompt_with_instruction_feedback_modified(question, options=options)
answer

[('system', 'You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: \nIncorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.\nExpand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.\nAdopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.\nPrioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.\nRefine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effect

'{\n    "Incorporate Context-Specific Details": "The patient is a 50-year-old man with a history of well-controlled type 2 diabetes, presenting with erectile dysfunction, pedal edema, decreased testicular volume, gynecomastia, and signs of portal hypertension (e.g., esophageal varices). The atrophic, hyperechoic, nodular liver on ultrasound suggests chronic liver disease, likely due to alcohol use, given his daily consumption of 5-6 beers.",\n    "Expand Diagnostic Considerations": "While the immediate concern is the management of esophageal varices, it is important to consider the underlying cause of the liver disease, which may include alcoholic liver disease or non-alcoholic fatty liver disease. Other potential complications of liver disease, such as hepatic encephalopathy or coagulopathy, should also be monitored.",\n    "Adopt a Holistic Approach to Reasoning": "The patient\'s erectile dysfunction may be multifactorial, involving both psychological and physiological components, in

In [408]:
# loop through the dataframe and answer the questions
agent_answers_with_improved_prompt = []
for index, row in df.iterrows():
    question = row['question']
    options = row['options']
    answer = zero_shot_agent_prompt_with_instruction_feedback_modified(question, options=options)
    agent_answers_with_improved_prompt.append(answer)

[('system', 'You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: \nIncorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.\nExpand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.\nAdopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.\nPrioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.\nRefine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effect

In [409]:
# save the answers to a file
filename = 'agent_answers_with_improved_prompt.txt'
# convert each answer to a string
agent_answers_with_improved_prompt_str = [str(answer) for answer in agent_answers_with_improved_prompt]
save_answers_to_file(agent_answers_with_improved_prompt_str, filename)


In [410]:
# get all answers into a list
agent_answers_with_improved_prompt_str = [json.dumps(answer) for answer in agent_answers_with_improved_prompt]
# print the first 5 answers
for i in range(5):
    print(agent_answers_with_improved_prompt_str[i])

"{\n    \"Incorporate Context-Specific Details\": \"The patient presents with signs of portal hypertension, evidenced by the palpable spleen, pedal edema, and esophageal varices. His history of well-controlled type 2 diabetes and alcohol consumption (5-6 beers daily) suggests potential liver disease, likely alcoholic liver disease, which aligns with the ultrasound findings of an atrophic, hyperechoic, nodular liver.\",\n    \"Expand Diagnostic Considerations\": \"While the immediate concern is the management of esophageal varices, it is important to consider the underlying cause of portal hypertension. Other potential diagnoses could include cirrhosis from non-alcoholic fatty liver disease, especially given the patient's diabetes, or other causes of liver dysfunction. However, the clinical picture strongly suggests alcoholic liver disease.\",\n    \"Adopt a Holistic Approach to Reasoning\": \"The patient's erectile dysfunction may be multifactorial, potentially related to his diabetes,

In [413]:
# print all answers letter and compare to the correct answer in the dataframe
count = 0
for i in range(len(agent_answers_with_improved_prompt)):
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers_with_improved_prompt[i]))
    
    # Check if answer_letters is not empty before accessing [0]
    if answer_letters:
        if answer_letters[0] == df['answer'][i]:
            count += 1
        print(answer_letters[0], df['answer'][i])
    else:
        # Print the entire line that couldn't be captured
        print(f"Couldn't capture the answer letter in line {i}: {agent_answers_with_improved_prompt_str[i]}", df['answer'][i])
    
print(count)


F B
C C
B C
A A
D D
K K
D D
F F
E E
C C
C C
F F
F F
A A
C C
D D
B F
A E
D D
H A
D C
C C
C C
B C
A A
C E
C C
B G
C C
D D
D E
C B
B B
B C
C C
D F
C C
D D
A A
A A
B B
A A
A C
G G
B B
F C
D D
A A
C C
A G
B C
C C
B B
D D
H A
B B
C A
B B
F A
F B
A B
D I
A F
B B
A B
G G
D G
C D
F G
F F
C C
B B
D B
D D
B B
B C
B D
A A
B E
F E
D D
C B
G F
B E
C A
A E
D D
D F
A A
C C
G G
G G
B E
E G
E C
A A
E F
B B
E C
A A
D D
A A
D D
A A
C C
A A
B B
D D
E F
D D
A F
B B
B G
B B
E E
C C
A A
F D
E E
D D
C C
C C
C C
B B
A A
D E
B B
E C
A D
B D
F F
F F
G G
D B
A C
A A
C C
C D
C C
A F
B B
C C
D E
F F
F F
C C
C C
B E
A A
E E
F F
C G
C C
C E
E E
E E
E E
D B
D D
E E
E E
A A
D D
B B
F F
D D
E E
D D
A B
C C
B C
D D
B E
E E
C D
E E
D C
D D
E E
A B
D C
A A
E E
A A
A A
C B
B B
E E
A A
D D
D D
B B
A A
D B
B B
D D
A D
E B
C A
A A
B C
D D
B B
E D
A C
D A
B E
E B
D A
E E
A D
B B
B D
A E
C D
D D
B A
D D
D D
B A
D D
A A
E B
A B
A A
E E
A D
C C
C A
C C
B B
C C
D D
C C
A A
E B
C D
C C
E B
E E
C C
B B
B B
A A
A A
C B
A A
E E
B B
D A


In [412]:
# calculate the accuracy
accuracy = count/len(agent_answers_with_improved_prompt)
accuracy

0.6613545816733067

In [None]:
# The results was better that RAG approach, but still worse than the zero shot approach. The agent was not able to learn from the feedback instruction. 
# We will try to use a different approach, we will use the feedback instruction as a prompt to the agent, and ask the agent to generate the answer based on the feedback instruction. 
# We will then compare the generated answer to the correct answer in the dataframe

In [418]:
# first, we will try with the previously wrong answers: import wrong_answers.xlsx to a dataframe
wrong_answers_xlsx = pd.read_excel('wrong_answers.xlsx')
wrong_answers_xlsx.head()

Unnamed: 0,Tool Used,Answer,True_answer,Question,AI_feedback,Extracted_AI_improvement_feedback
0,Zero-Shot Tool,"{'Answer': 'Cholesterol embolism', 'Answer_let...","{'True_answer_letter': 'C', 'True_answer_text'...",A 65-year-old Asian woman comes to the physici...,The issue with the logic of the provided data ...,The AI's reasoning could be improved by incorp...
1,Zero-Shot Tool,"{'Answer': 'Effect modification', 'Answer_lett...","{'True_answer_letter': 'C', 'True_answer_text'...",A gastroenterology fellow is interested in the...,"In this case, the AI's answer (`Effect modific...",The AI should improve its understanding of key...
2,Zero-Shot Tool,{'Answer': 'Administer pneumococcal conjugate ...,"{'True_answer_letter': 'D', 'True_answer_text'...",A 60-year-old man comes to the physician for a...,"In this case, the AI's response (`Administer p...",The AI needs to be more attentive to context-s...
3,Zero-Shot Tool,"{'Answer': 'Dental caries', 'Answer_letter': '...","{'True_answer_letter': 'C', 'True_answer_text'...",A 35-year-old female comes to the physician be...,"In this case, the AI's response suggests that ...",The AI should be trained to consider a wider r...
4,Zero-Shot Tool,"{'Answer': 'Pulmonary function testing', 'Answ...","{'True_answer_letter': 'C', 'True_answer_text'...",A 67-year-old man comes to the physician for a...,"In this scenario, the AI's response indicates ...",The AI should be trained to consider a holisti...


In [419]:
# Check the columns of both DataFrames
print("wrong_answers_xlsx columns:", wrong_answers_xlsx.columns)
print("df columns:", df.columns)

wrong_answers_xlsx columns: Index(['Tool Used', 'Answer', 'True_answer', 'Question', 'AI_feedback',
       'Extracted_AI_improvement_feedback'],
      dtype='object')
df columns: Index(['question', 'answer', 'options', 'meta_info'], dtype='object')


In [420]:
# Initialize the 'options' column if it doesn't exist
if 'options' not in wrong_answers_xlsx.columns:
    wrong_answers_xlsx['options'] = None  # or use an appropriate default value

In [421]:
for index, row in wrong_answers_xlsx.iterrows():
    question = row['Question']
    for i, r in df.iterrows():
        if r['question'] == question:
            # Append the options to the wrong_answers_xlsx DataFrame
            wrong_answers_xlsx.at[index, 'options'] = r['options']

In [422]:
# search for options in the df dataframe, that match the question in the wrong_answers_xlsx dataframe, loop through all rows in the wrong_answers_xlsx dataframe, and search for the options in the df dataframe
# if the question in the wrong_answers_xlsx dataframe matches the question in the df dataframe, we will append the options to the wrong_answers_xlsx dataframe
wrong_answers_xlsx.head()

Unnamed: 0,Tool Used,Answer,True_answer,Question,AI_feedback,Extracted_AI_improvement_feedback,options
0,Zero-Shot Tool,"{'Answer': 'Cholesterol embolism', 'Answer_let...","{'True_answer_letter': 'C', 'True_answer_text'...",A 65-year-old Asian woman comes to the physici...,The issue with the logic of the provided data ...,The AI's reasoning could be improved by incorp...,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ..."
1,Zero-Shot Tool,"{'Answer': 'Effect modification', 'Answer_lett...","{'True_answer_letter': 'C', 'True_answer_text'...",A gastroenterology fellow is interested in the...,"In this case, the AI's answer (`Effect modific...",The AI should improve its understanding of key...,"{'A': 'Random error', 'B': 'Effect modificatio..."
2,Zero-Shot Tool,{'Answer': 'Administer pneumococcal conjugate ...,"{'True_answer_letter': 'D', 'True_answer_text'...",A 60-year-old man comes to the physician for a...,"In this case, the AI's response (`Administer p...",The AI needs to be more attentive to context-s...,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ..."
3,Zero-Shot Tool,"{'Answer': 'Dental caries', 'Answer_letter': '...","{'True_answer_letter': 'C', 'True_answer_text'...",A 35-year-old female comes to the physician be...,"In this case, the AI's response suggests that ...",The AI should be trained to consider a wider r...,"{'A': 'Dental caries', 'B': 'Antiphospholipid ..."
4,Zero-Shot Tool,"{'Answer': 'Pulmonary function testing', 'Answ...","{'True_answer_letter': 'C', 'True_answer_text'...",A 67-year-old man comes to the physician for a...,"In this scenario, the AI's response indicates ...",The AI should be trained to consider a holisti...,"{'A': 'Pulmonary function testing', 'B': 'Colo..."


In [423]:
# update the wrong_answers.xlsx file with the options
filename = 'wrong_answers_with_otpions.xlsx'
wrong_answers_xlsx.to_excel(filename, index=False)

In [424]:
# check length of wrong_answers_xlsx
len(wrong_answers_xlsx)

88

In [437]:
# run the agent zero_shot_agent_prompt_with_instruction_feedback_modified with the questions, options in the wrong_answers_xlsx dataframe
agent_answers_with_improved_prompt_wrong_answers = []
for index, row in wrong_answers_xlsx.iterrows():
    question = row['Question']
    options = row['options']
    answer = zero_shot_agent_prompt_with_instruction_feedback_modified(question, options=options)
    agent_answers_with_improved_prompt_wrong_answers.append(answer)

# save the answers to a file

[('system', 'You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided. Your answer should reflect the following feedback instruction: \nIncorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.\nExpand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.\nAdopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.\nPrioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.\nRefine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effect

In [441]:
agent_answers_with_improved_prompt_wrong_answers_str[0]

'{\n    "Incorporate Context-Specific Details": "The patient is a 65-year-old Asian woman with a history of cardiovascular disease, including a recent myocardial infarction, and significant risk factors such as smoking and hypertension. The presence of night sweats over the past two months raises concern for systemic issues, while the nontender skin lesion near the right large toenail warrants further investigation, particularly in the context of her age and risk factors.",\n    "Expand Diagnostic Considerations": "In addition to the listed options, other potential diagnoses for the skin lesion could include basal cell carcinoma, actinic keratosis, or even a benign dermatological condition like a seborrheic keratosis. The night sweats could suggest an infectious process, malignancy, or even a side effect of her medications, particularly if they are new or recently adjusted.",\n    "Adopt a Holistic Approach to Reasoning": "The patient\'s overall health, including her cardiovascular sta

In [438]:
# save the answers to a file
filename = 'agent_answers_with_improved_prompt_wrong_answers.txt'
# convert each answer to a string
agent_answers_with_improved_prompt_wrong_answers_str = [str(answer) for answer in agent_answers_with_improved_prompt_wrong_answers]
save_answers_to_file(agent_answers_with_improved_prompt_wrong_answers_str, filename)


In [458]:
# json.loads(wrong_answers_xlsx['True_answer'][0])['True_answer_letter']
wrong_answers_xlsx['True_answer'][0]
true_answer_letter = re.findall(r"'True_answer_letter':\s*'(\w)'", wrong_answers_xlsx['True_answer'][0])
true_answer_letter

['C']

In [464]:
import re

def compare_agent_answers(agent_answers, df_in):
    """
    Compares the answer letters from agent responses with the correct answers in the DataFrame.

    Parameters:
    - agent_answers (list): A list of agent response strings containing answer letters.
    - df (DataFrame): A pandas DataFrame with the correct answers in the 'answer' column.

    Returns:
    - int: The count of correct answers.
    """
    count = 0  # Initialize the correct answer count

    # Loop through each agent answer and compare it with the corresponding correct answer
    for i in range(len(agent_answers)):
        # Extract the answer letter from the agent's response using regex
        answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers[i]))
        true_answer_letter = re.findall(r"'True_answer_letter':\s*'(\w)'", df_in['True_answer'][i])

        # Check if answer_letters is not empty before accessing [0]
        if answer_letters:
            # Compare extracted answer letter with the correct answer in the DataFrame
            if answer_letters[0] == true_answer_letter[0]:
                count += 1  # Increment count if correct
            print(answer_letters[0], true_answer_letter[0])
        else:
            # Print the entire line that couldn't be captured
            print(f"Couldn't capture the answer letter in line {i}: {agent_answers[i]}", true_answer_letter[0])
    print(count/len(agent_answers))
    return count  # Return the total count of correct answers


In [465]:
# check the accuracy of the agent answers
count = compare_agent_answers(agent_answers_with_improved_prompt_wrong_answers, wrong_answers_xlsx)
accuracy = count/len(agent_answers_with_improved_prompt_wrong_answers)


A C
B C
D D
D C
B C
C E
D E
C C
H A
C A
A F
D G
B D
C B
G F
D F
B E
E G
E C
E C
D D
G G
A F
A B
D E
F F
F F
C E
A A
D C
C B
E B
A A
B C
E D
B E
E B
D A
A D
B D
D E
A B
E E
A D
B B
C D
E B
C B
E B
D A
B A
B A
A D
A C
B D
E C
D D
B A
C A
A C
C C
B F
B A
D C
C B
E B
B D
D E
C D
A C
E E
C E
E D
B A
B C
A B
A A
E D
E C
A C
B B
C B
C D
B A
A A
C B
B A
D E
0.18181818181818182


In [425]:
# agent_answers_with_improved_prompt list has 502 answers, and match the length of the df dataframe. We will add each answer to the df dataframe, and then save the df dataframe to a file

# Initialize the 'agent_answer' column if it doesn't exist
if 'agent_answer' not in df.columns:
    df['agent_answer'] = None  # or use an appropriate default value

# loop through the df dataframe and add answers from the agent_answers_with_improved_prompt list to the df dataframe
for i, row in df.iterrows():
    df.at[i, 'agent_answer'] = agent_answers_with_improved_prompt_str[i]



In [434]:
df.head()

Unnamed: 0,question,answer,options,meta_info,agent_answer,Correct,Agent_answer_letter
0,A 50-year-old man comes to the physician becau...,B,"{'A': 'Injection sclerotherapy', 'B': 'Nadolol...",step2,"""{\n \""Incorporate Context-Specific Details...",False,F
1,A 65-year-old Asian woman comes to the physici...,C,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...",step2,"""{\n \""Incorporate Context-Specific Details...",True,C
2,A gastroenterology fellow is interested in the...,C,"{'A': 'Random error', 'B': 'Effect modificatio...",step2,"""{\n \""Incorporate Context-Specific Details...",False,B
3,A 23-year-old woman with Ehlers-Danlos syndrom...,A,"{'A': 'Arthroscopy', 'B': 'Above knee cast', '...",step2,"""{\n \""Incorporate Context-Specific Details...",True,A
4,A 60-year-old man comes to the physician for a...,D,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...",step2,"""{\n \""Incorporate Context-Specific Details...",True,D


In [429]:
# print all answers letter and compare to the correct answer in the dataframe
# count = 0
# for i in range(len(agent_answers_with_improved_prompt)):
#     answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers_with_improved_prompt[i]))
    
#     # Check if answer_letters is not empty before accessing [0]
#     if answer_letters:
#         if answer_letters[0] == df['answer'][i]:
#             count += 1
#         print(answer_letters[0], df['answer'][i])
#     else:
#         # Print the entire line that couldn't be captured
#         print(f"Couldn't capture the answer letter in line {i}: {agent_answers_with_improved_prompt_str[i]}", df['answer'][i])
    
# print(count)


# create 'Correct' column in the df dataframe
df['Correct'] = None
# Check if the 'Answer_letter' is correct. Make a 'Correct' column in df. If it is correct, set 'Correct' to True, else False.
for i in range(len(agent_answers_with_improved_prompt)):
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers_with_improved_prompt[i]))
    if answer_letters:
        if answer_letters[0] == df['answer'][i]:
            df.at[i, 'Correct'] = True
        else:
            df.at[i, 'Correct'] = False
    else:
        df.at[i, 'Correct'] = False




In [433]:
# add 'Agent_answer_letter' column to the df dataframe
df['Agent_answer_letter'] = None
# answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers_with_improved_prompt[0]))
# loop through the df dataframe and add the 'Answer_letter' to the 'Agent_answer_letter' column
for i in range(len(agent_answers_with_improved_prompt)):
    answer_letters = re.findall(r'"Answer_letter":\s*"(\w)"', str(agent_answers_with_improved_prompt[i]))
    if answer_letters:
        df.at[i, 'Agent_answer_letter'] = answer_letters[0]
        

In [435]:
# find the row where the anser_letter is empty in the df dataframe
df[df['Agent_answer_letter'].isnull()]


Unnamed: 0,question,answer,options,meta_info,agent_answer,Correct,Agent_answer_letter
448,A 61-year-old man presents to his primary care...,C,"{'A': 'Alpha-1-antitrypsin deficiency', 'B': '...",step1,"""{\n \""Incorporate Context-Specific Details...",False,


In [436]:
# save the df dataframe to a file
filename = 'df_with_agent_answers_from_improved_prompt.xlsx'
df.to_excel(filename, index=False)

In [479]:
wrong_answers_xlsx.head()

Unnamed: 0,Tool Used,Answer,True_answer,Question,AI_feedback,Extracted_AI_improvement_feedback,options,agent_re_answer
0,Zero-Shot Tool,"{'Answer': 'Cholesterol embolism', 'Answer_let...","{'True_answer_letter': 'C', 'True_answer_text'...",A 65-year-old Asian woman comes to the physici...,The issue with the logic of the provided data ...,The AI's reasoning could be improved by incorp...,"{'A': 'Cholesterol embolism', 'B': 'Traumatic ...","{\n ""Answer"": ""Malignant melanoma"",\n ""A..."
1,Zero-Shot Tool,"{'Answer': 'Effect modification', 'Answer_lett...","{'True_answer_letter': 'C', 'True_answer_text'...",A gastroenterology fellow is interested in the...,"In this case, the AI's answer (`Effect modific...",The AI should improve its understanding of key...,"{'A': 'Random error', 'B': 'Effect modificatio...","{\n ""Answer"": ""Confounding"",\n ""Answer_l..."
2,Zero-Shot Tool,{'Answer': 'Administer pneumococcal conjugate ...,"{'True_answer_letter': 'D', 'True_answer_text'...",A 60-year-old man comes to the physician for a...,"In this case, the AI's response (`Administer p...",The AI needs to be more attentive to context-s...,"{'A': 'Perform prostate biopsy', 'B': 'Obtain ...","```json\n{\n ""Answer"": ""Obtain low-dose CT ..."
3,Zero-Shot Tool,"{'Answer': 'Dental caries', 'Answer_letter': '...","{'True_answer_letter': 'C', 'True_answer_text'...",A 35-year-old female comes to the physician be...,"In this case, the AI's response suggests that ...",The AI should be trained to consider a wider r...,"{'A': 'Dental caries', 'B': 'Antiphospholipid ...","```json\n{\n ""Answer"": ""Dental caries"",\n ..."
4,Zero-Shot Tool,"{'Answer': 'Pulmonary function testing', 'Answ...","{'True_answer_letter': 'C', 'True_answer_text'...",A 67-year-old man comes to the physician for a...,"In this scenario, the AI's response indicates ...",The AI should be trained to consider a holisti...,"{'A': 'Pulmonary function testing', 'B': 'Colo...","```json\n{\n ""Answer"": ""Pulmonary function ..."


In [467]:
def wrong_re_answer_prompt(question, options, improvement_feedback):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            f"""
            You answered this question incorrectly. Based on your previous answer, you got the following feedback: {improvement_feedback}
            Based on the feedback, re-answer the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {{
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }}
            """
        )
    ]
    answer = llm.invoke(message)
    return answer.content

In [468]:
# test the function questions and options from the wrong_answers_xlsx dataframe
question = wrong_answers_xlsx['Question'][0]
options = wrong_answers_xlsx['options'][0]
improvement_feedback = wrong_answers_xlsx['Extracted_AI_improvement_feedback'][0]
answer = wrong_re_answer_prompt(question, options, improvement_feedback)
answer


'{\n    "Answer": "Malignant melanoma",\n    "Answer_letter": "C",\n    "Reasoning": "The clinical scenario presents a 65-year-old woman with a history of smoking and cardiovascular disease, who has a nontender skin lesion near the right large toenail. The presence of night sweats, although nonspecific, can sometimes be associated with malignancies. Given the location and description of the lesion, malignant melanoma should be considered, especially since subungual melanoma can present as a pigmented lesion under or near the nail. The patient\'s smoking history and age increase her risk for malignancies, and melanoma is a critical diagnosis not to miss. Other options like cholesterol embolism or traumatic subungual hemorrhage are less likely given the chronicity and description of the lesion.",\n    "Confidence": 0.8\n}'

In [469]:
# loop through the wrong_answers_xlsx dataframe and re-answer the questions
agent_answers_re_answer = []
for index, row in wrong_answers_xlsx.iterrows():
    question = row['Question']
    options = row['options']
    improvement_feedback = row['Extracted_AI_improvement_feedback']
    answer = wrong_re_answer_prompt(question, options, improvement_feedback)
    agent_answers_re_answer.append(answer)
    

In [470]:
# check the accuracy of the agent answers
count = compare_agent_answers(agent_answers_re_answer, wrong_answers_xlsx)
accuracy = count/len(agent_answers_re_answer)

C C
C C
E D
A C
A C
A E
C E
C C
A A
A A
F F
E G
D D
C B
F F
F F
B E
G G
E C
B C
A D
G G
A F
A B
D E
C F
A F
A E
A A
C C
B B
B B
A A
C C
D D
E E
B B
A A
E D
B D
D E
A B
E E
D D
B B
C D
E B
C B
E B
D A
B A
E A
B D
A C
B D
C C
D D
A A
A A
C C
C C
F F
B A
C C
B B
B B
B D
D E
C D
C C
E E
C E
D D
A A
B C
B B
B A
D D
C C
C C
B B
C B
C D
B A
D A
C B
B A
B E
0.48863636363636365


In [471]:
# add the answers to wrong_answers_xlsx dataframe 
wrong_answers_xlsx['agent_re_answer'] = agent_answers_re_answer

In [480]:
# save the wrong_answers_xlsx dataframe to a file
filename = 'wrong_answers_xlsx_with_re_answer_from_feedback.xlsx'
wrong_answers_xlsx.to_excel(filename, index=False)

In [476]:
def wrong_re_answer_prompt_without_feedback(question, options):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            f"""
            You answered this question incorrectly. Re-answer the following clinical scenario in {question}, provide an answer based on the list of {options}
            Output format:
            {{
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }}
            """
        )
    ]
    answer = llm.invoke(message)
    return answer.content

In [477]:
# try the function
question = wrong_answers_xlsx['Question'][0]
options = wrong_answers_xlsx['options'][0]
answer = wrong_re_answer_prompt_without_feedback(question, options)
answer

'{\n    "Answer": "Cholesterol embolism",\n    "Answer_letter": "A",\n    "Reasoning": "The patient has a history of cardiovascular disease and recent percutaneous coronary intervention, which increases the risk of cholesterol embolism. The presence of a nontender skin lesion near the toenail could represent a \'blue toe syndrome\' or livedo reticularis, which are common cutaneous manifestations of cholesterol embolism. The absence of trauma history and the patient\'s cardiovascular risk factors make cholesterol embolism the most likely diagnosis.",\n    "Confidence": 0.8\n}'

In [478]:
def extract_confidence_score(response_content):
    # convert the response to string
    response_str = str(response_content)
    # extract the confidence score using regex
    confidence = re.search(r'"Confidence":\s*([0-9]*\.[0-9]+)', response_str)
    # also search for Confidence score in single quotes
    if confidence is None:
        confidence = re.search(r"'Confidence':\s*([0-9]*\.[0-9]+)", response_str)
    if confidence:
        return float(confidence.group(1))
    else:
        return None

In [496]:
def re_answer_with_additional_context(question, options, additional_context):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            f"""
            You answered this question incorrectly: {additional_context}.
            Explain why you might be wrong based on: 
                Incorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.
                Expand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.
                Adopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.
                Prioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.
                Refine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effectively differentiate between similar cases.
                Deepen Knowledge Integration: you have knowledge in specialized medical areas (such as vaccination schedules or specific conditions) to improve your application of knowledge across varied contexts.
            Re-answer the following clinical scenario: {question}.
            Provide an answer based on the list of {options}.
            Output format:
            {{
                "Self_feedback": "Self feedback text",
            }}
            """
        )
    ]
    
    answer = llm.invoke(message)
    return answer.content

In [494]:
def wrong_re_answer_prompt_with_confidence_selffeedback(question, options):
    message = [
        (
            'system',
            "You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided."
        ),
        (
            'user',
            f"""
            You answered this question incorrectly. Re-answer the following clinical scenario: {question}. 
            Provide an answer based on the list of {options}.
            Output format:
            {{
                "Answer": "Answer text",
                "Answer_letter": "Answer option A, B, C, etc.",
                "Reasoning": "Reasoning text",
                "Confidence": "Confidence score float 0-1"
            }}
            """
        )
    ]
    
    answer = llm.invoke(message)
    response_content = answer.content
    print(f"Initial response: {response_content}")

    # Extract confidence score from the response
    confidence_score = extract_confidence_score(response_content)  # You'll need to define this function
    print(f"Initial confidence score: {confidence_score}")

    # Check if confidence score is not 0.95
    re_answer = None
    if confidence_score is not None and confidence_score < 0.95:
        print("Confidence score below threshold, re-answering with additional context...")
        additional_context = response_content  # Use the response content as additional context
        re_answer =  re_answer_with_additional_context(question, options, additional_context)  # Call new function
        print(f"Re-answer response content: {re_answer}")
    return response_content, re_answer

In [497]:
# test the function
question = wrong_answers_xlsx['Question'][0]
options = wrong_answers_xlsx['options'][0]
response_content, re_answer = wrong_re_answer_prompt_with_confidence_selffeedback(question, options)
response_content, re_answer


Initial response: ```json
{
    "Answer": "Cholesterol embolism",
    "Answer_letter": "A",
    "Reasoning": "The patient has a history of recent myocardial infarction and percutaneous coronary intervention, which increases the risk of cholesterol embolism. The presence of a nontender skin lesion near the toenail, in the context of her cardiovascular history, suggests a possible cholesterol embolism. This condition can cause small, painful, or painless skin lesions, often in the form of livedo reticularis or blue toe syndrome, due to emboli lodging in small vessels. The absence of other systemic symptoms like fever or weight loss makes other options less likely.",
    "Confidence": 0.85
}
```
Initial confidence score: 0.85
Confidence score below threshold, re-answering with additional context...
Re-answer response content: ```json
{
    "Self_feedback": "In the initial analysis, I focused heavily on the patient's cardiovascular history and the presence of a nontender skin lesion, which

('```json\n{\n    "Answer": "Cholesterol embolism",\n    "Answer_letter": "A",\n    "Reasoning": "The patient has a history of recent myocardial infarction and percutaneous coronary intervention, which increases the risk of cholesterol embolism. The presence of a nontender skin lesion near the toenail, in the context of her cardiovascular history, suggests a possible cholesterol embolism. This condition can cause small, painful, or painless skin lesions, often in the form of livedo reticularis or blue toe syndrome, due to emboli lodging in small vessels. The absence of other systemic symptoms like fever or weight loss makes other options less likely.",\n    "Confidence": 0.85\n}\n```',
 '```json\n{\n    "Self_feedback": "In the initial analysis, I focused heavily on the patient\'s cardiovascular history and the presence of a nontender skin lesion, which led me to consider cholesterol embolism as the most likely diagnosis. However, I need to incorporate more context-specific details and

In [498]:
# Re-design system prompt
system_prompt = """
You are a medical doctor, experienced in diagnosing diseases. You are presented with a clinical scenario and asked to provide a conclusion, based on the information provided.
You also keep this in mind: 
    Incorporate Context-Specific Details: you have to utilize relevant context from each case to avoid making generalizations that may not apply.
    Expand Diagnostic Considerations: you have to evaluate a broader range of potential diagnoses, improving its decision-making across various medical scenarios.
    Adopt a Holistic Approach to Reasoning: you have to consider multiple aspects of a case holistically, rather than focusing on individual symptoms or details in isolation.
    Prioritize Recent and Relevant Information: you have to weigh recent and pertinent information more heavily, especially in rapidly evolving medical fields.
    Refine Diagnostic Logic Pathways: you have to recognize the significance of common diagnostic indicators and effectively differentiate between similar cases.
    Deepen Knowledge Integration: you have knowledge in specialized medical areas (such as vaccination schedules or specific conditions) to improve your application of knowledge across varied contexts.
"""

In [499]:
# write a prompt agent that will extract the question context into head, relation, and tail, following data structure:
# thinking process detect: 
# If evidence present: (e:evidence A)-[r:Conjuct | Disjunct | Negate]->(e:evidence B)-[r:Conjuct | Disjunct | Negate]->(e:evidence C) ...
# If evidence not present: (e:evidence / thought A)-[r:Conjuct | Disjunct | Negate]->(e:evidence / thought B)-[r:Conjuct | Disjunct | Negate]->(e:evidence / thought C) ...
# 

In [501]:
class Node:
    """A class representing a medical condition or fact."""
    def __init__(self, name, value):
        self.name = name
        self.value = value  # True or False

    def __repr__(self):
        return f"{self.name}: {self.value}"

def implication(A, B):
    """Implements a logical implication A -> B."""
    return not A.value or B.value

# Define nodes
H = Node("Significant smoking history", True)
Q = Node("Smoke-free for 20 years", True)
T = Node("Lung cancer screening indicated", True)
not_T = Node("Lung cancer screening not indicated", not T.value)

# Evaluate implications
history_implies_test = implication(H, T)         # H ⇒ T
cessation_implies_no_test = implication(Q, not_T)  # Q ⇒ ¬T

# Final assessment
assessment = history_implies_test and cessation_implies_no_test

# Display results
print(f"({H.name} ⇒ {T.name}) ∧ ({Q.name} ⇒ ¬{T.name}): {assessment}")



(Significant smoking history ⇒ Lung cancer screening indicated) ∧ (Smoke-free for 20 years ⇒ ¬Lung cancer screening indicated): False
