In [None]:
import torch
import pandas as pd
import json
import faiss
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import random
import numpy as np


device = "cuda" if torch.cuda.is_available() else "cpu"


def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)  


text_embedding_model = SentenceTransformer("/root/autodl-tmp/all-MiniLM-L6-v2")


def load_text_retrieval_system(index_file, texts_file):
    index = faiss.read_index(index_file)
    with open(texts_file, "r", encoding="utf-8") as f:
        texts = json.load(f)
    return index, texts


def retrieve_best_text(query, index, texts):
    query_embedding = text_embedding_model.encode([query], convert_to_tensor=False)
    distances, indices = index.search(query_embedding.astype("float32"), 1)

    best_index = indices[0][0]
    best_text = texts[best_index]
    best_similarity = 1 / (1 + distances[0][0]) 
    
    return best_text if best_similarity > 0.025 else ""


def load_model(model_path):
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True,
        trust_remote_code=True
    ).to(device).eval()
    return model, tokenizer


def format_query(user_input, retrieved_text):
    context = f"Retrieved Document: {retrieved_text}" if retrieved_text else ""
    return f"### User Question ###\n{user_input}\nBased on the symptoms I described, what disease do you think I have? Please give the disease name before proceeding with the subsequent analysis.\n### Retrieved Context ###\n{context}\n\n### Assistant Answer ###\n"


def generate_response(model, tokenizer, query):
    inputs = tokenizer(query, return_tensors="pt").to(device)
    gen_kwargs = {"max_new_tokens": 50, "do_sample": True, "temperature": 0.7, "top_k": 50}
    with torch.no_grad():
        outputs = model.generate(**inputs, **gen_kwargs)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.replace(query, "").strip()


def process_excel_with_rag(file_path, output_path, model_path, index_file, texts_file):
    df = pd.read_excel(file_path)
    
    if "Response after RAG" not in df.columns:
        df["Response after RAG"] = ""
    df["Response after RAG"] = df["Response after RAG"].astype(str)
    
    model, tokenizer = load_model(model_path)
    index, texts = load_text_retrieval_system(index_file, texts_file)
    
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Processing with RAG"):
        symptom_text = row["Symptoms"]
        retrieved_text = retrieve_best_text(symptom_text, index, texts)
        query_with_rag = format_query(symptom_text, retrieved_text)
        
        response = generate_response(model, tokenizer, query_with_rag)
        df.at[i, "Response after RAG"] = str(response)
        
        if i < 20:
            print(f"--- Record {i+1} ---")
            print(f"Symptoms: {symptom_text}")
            # print(f"Retrieved Text: {retrieved_text}")
            print(f"Generated Response: {response}")
            print("\n" + "="*80 + "\n")
    
    df.to_excel(output_path, index=False)


if __name__ == "__main__":
    input_file = "/root/autodl-tmp/nhs_test.xlsx"
    output_file = "/root/autodl-tmp/nhs_test_with_rag.xlsx"
    model_path = "/root/autodl-tmp/trained_model_MedQA"
    index_file = "/root/autodl-tmp/NHS_text_index.faiss"
    texts_file = "/root/autodl-tmp/NHS_text_texts.json"
    
    process_excel_with_rag(input_file, output_file, model_path, index_file, texts_file)


Processing with RAG:   0%|          | 1/410 [00:03<24:23,  3.58s/it]

--- Record 1 ---
Symptoms: In most cases, this condition causes no noticeable symptoms. However, if it becomes large, some people may develop pain or a pulsating feeling in their abdomen (tummy) or persistent back pain. This condition doesn’t usually pose a serious threat to health, but there is a risk that a larger abnormality could rupture. A rupture can cause massive internal bleeding, which is usually fatal. Around 8 out of 10 people with a rupture either die before they reach the hospital or do not survive surgery. The most common symptom of a rupture is sudden and severe abdominal pain. If you suspect that you or someone else has had a rupture, call emergency services immediately and ask for an ambulance.
Generated Response: Aortic aneurysm

### Step 1: Identify the disease from the symptoms described
The disease described is an aortic aneurysm, which is characterized by the dilation of the aorta. The symptoms described are a pulsating




Processing with RAG:   0%|          | 2/410 [00:06<21:44,  3.20s/it]

--- Record 2 ---
Symptoms: Symptoms may vary from person to person and can appear gradually or suddenly. Common symptoms include swelling, pain that worsens during or after movement or exercise, stiffness that is more pronounced in the morning or after periods of rest, tenderness when touching the affected area, mild heat, and a loss of movement and strength around the ankle and foot.
Generated Response: Osteoarthritis of the knee

### Step 1: Identify the symptoms described
The symptoms described include swelling, pain that worsens during or after movement or exercise, stiffness that is more pronounced in the morning or after periods of rest,




Processing with RAG:   1%|          | 3/410 [00:09<20:55,  3.09s/it]

--- Record 3 ---
Symptoms: This condition is very common among teenagers and young adults, affecting approximately 80% of individuals aged 11 to 30. It is most prevalent in girls between the ages of 14 to 17 and in boys between 16 to 19. Many people experience intermittent symptoms over several years, with improvements typically occurring as they get older. Symptoms often disappear by the mid-twenties, but in some cases, they may persist into adulthood. Around 5% of women and 1% of men continue to experience symptoms beyond the age of 25.
Generated Response: Pruritus
Pruritus is the most common symptom associated with eczema, and it is typically accompanied by a red rash. The rash can be generalised, affecting the entire body, or it may be localised to one or more




Processing with RAG:   1%|          | 4/410 [00:12<20:48,  3.08s/it]

--- Record 4 ---
Symptoms: To diagnose this condition, a healthcare professional will examine your abdomen. They may perform a simple test called Murphy’s sign, where you will be asked to take a deep breath while pressure is applied just below your rib cage. If the condition is present, you will experience sudden pain as your organ moves downward during inhalation. If symptoms suggest an acute case, you will be referred to a hospital immediately for further tests and treatment, which may include additional diagnostic procedures.
Generated Response: Ovarian cyst

### Step 1: The patient experiences pain during deep breathing and sudden pain when pressure is applied just below the rib cage, which suggests an abdominal condition.
This information points toward a diagnosis of an ovarian cyst, which is a




Processing with RAG:   1%|          | 5/410 [00:15<20:46,  3.08s/it]

--- Record 5 ---
Symptoms: Symptoms of this condition usually begin gradually before rapidly worsening as the number of abnormal cells in the blood increases. Most symptoms result from a lack of healthy blood cells. Common signs include pale skin, fatigue, breathlessness, frequent infections over a short period, unusual or frequent bleeding (such as bleeding gums or nosebleeds), a high temperature (fever) of 38°C (100.4°F) or above, night sweats, bone and joint pain, easily bruised skin, swollen lymph nodes, abdominal pain due to an enlarged liver or spleen, unexplained weight loss, and a purple skin rash (purpura).
Generated Response: Leukemia

### Step 1: Determine the type of leukemia
Leukemia is a type of cancer that affects the blood and bone marrow, and there are several subtypes. Acute myeloid leukemia (AML) and acute lymph




Processing with RAG:   1%|▏         | 6/410 [00:18<20:16,  3.01s/it]

--- Record 6 ---
Symptoms: As abnormal cells multiply in the bone marrow, the production of normal blood cells decreases. Children may become tired and lethargic due to anemia caused by a lack of red blood cells. They may develop bruises, and bleeding may take longer to stop due to a low platelet count, which affects blood clotting. Additionally, a reduced number of normal white blood cells can lead to frequent infections. Affected children may feel generally unwell, experience aches and pains in the limbs, or have swollen lymph glands. Initially, the symptoms may resemble those of a viral infection, but when they persist for more than one to two weeks, the underlying condition often becomes clear.
Generated Response: Leukemia
### Explanation ###
The abnormal cell multiplication in the bone marrow, anemia, bruising, bleeding, low platelet count, frequent infections, and swollen lymph glands described in the question are all symptoms of leukemia, a type of cancer




Processing with RAG:   2%|▏         | 7/410 [00:21<20:03,  2.99s/it]

--- Record 7 ---
Symptoms: Many of the symptoms of this condition result from a reduced number of healthy blood cells in the body. Common symptoms include looking paler than usual and feeling tired due to a low red blood cell count (anemia), easy bruising and prolonged bleeding caused by a reduced number of platelets, and frequent infections due to a shortage of mature white blood cells. Additional symptoms may include bone aches and pains, swollen lymph nodes in the neck, underarms, or groin, general feelings of unwellness and fatigue, fever and night sweats without an obvious cause, as well as headaches and visual disturbances.
Generated Response: Leukemia

### Step 1: The condition is characterized by a reduced number of healthy blood cells in the body.
The condition is characterized by a reduced number of healthy blood cells in the body.

### Step 2: The patient presents with




Processing with RAG:   2%|▏         | 8/410 [00:24<20:06,  3.00s/it]

--- Record 8 ---
Symptoms: The symptoms of this condition typically develop over a few weeks and gradually become more severe. Common signs include pale skin, persistent tiredness, breathlessness, frequent infections, and unusual or frequent bleeding, such as bleeding gums or nosebleeds.
Generated Response: Thalassemia major

### Step 1: Identify the key symptoms described
The key symptoms described are pale skin, persistent tiredness, breathlessness, frequent infections, and unusual or frequent bleeding.

### Step 2: Relate these




Processing with RAG:   2%|▏         | 9/410 [00:27<19:57,  2.99s/it]

--- Record 9 ---
Symptoms: As abnormal cells multiply in the bone marrow, the production of normal blood cells decreases. This can cause children to become tired and lethargic due to anemia, which results from a shortage of red blood cells. They may also develop bruises, and bleeding may take longer to stop because of a reduced number of platelets. Additionally, a low count of normal white blood cells can make them more susceptible to infections. Affected children often feel generally unwell, experience aches and pains in their limbs, or develop swollen lymph glands. Initially, the symptoms may resemble those of a viral infection, but if they persist for more than one to two weeks, the underlying condition usually becomes apparent.
Generated Response: Leukemia

### Step 1: Understanding the description of the disease
Leukemia is a group of cancers that affect the blood and bone marrow. Abnormal cells accumulate in the bone marrow, leading to a decrease in the production of normal




Processing with RAG:   2%|▏         | 10/410 [00:30<19:43,  2.96s/it]

--- Record 10 ---
Symptoms: Many of the symptoms of this condition result from a reduced number of healthy blood cells in the body. Common symptoms include looking paler than usual and feeling tired due to a low red blood cell count (anemia), easy bruising and prolonged bleeding caused by a reduced number of platelets, and frequent infections due to a shortage of mature white blood cells. Other symptoms may include aches and pains in the bones, swollen lymph nodes in the neck, underarms, or groin, feeling generally unwell and fatigued, fever and night sweats without an obvious cause, headaches and blurred vision due to an excess of abnormal white blood cells, and breathlessness related to the same issue.
Generated Response: Leukemia

Leukemia is a disease of the blood and bone marrow. The bone marrow produces cells called blood cells, which are responsible for carrying oxygen and nutrients throughout the body, fighting off infections, and repairing damaged tissues. The bone marrow




Processing with RAG:   3%|▎         | 11/410 [00:33<19:32,  2.94s/it]

--- Record 11 ---
Symptoms: The primary symptom of this condition is a severe, dull pain in the upper abdomen that appears suddenly. This aching pain often worsens gradually and may radiate to the back or under the left shoulder blade. Eating or drinking, particularly fatty foods, can quickly intensify the discomfort. Some individuals find that leaning forward or curling into a ball helps relieve the pain, whereas lying flat on the back tends to make it worse. When caused by gallstones, symptoms often arise after consuming a large meal, while cases related to alcohol consumption typically lead to pain developing 6 to 12 hours after drinking a significant amount of alcohol.
Generated Response: Gallstones

### Step 1:  Identify the primary symptom described in the question.
The primary symptom of the condition described is a severe, dull pain in the upper abdomen that appears suddenly.

### Step 2:  Identify the radiating




Processing with RAG:   3%|▎         | 12/410 [00:36<19:32,  2.95s/it]

--- Record 12 ---
Symptoms: This condition is managed with lifelong medication to replace missing hormones. With proper treatment, symptoms can be largely controlled, allowing most individuals to lead an active life with minimal limitations. However, many people experience episodes of fatigue and may develop related health conditions such as diabetes or an underactive thyroid. A significant risk associated with this condition is an adrenal crisis, a sudden worsening of symptoms that occurs when cortisol levels drop severely due to missed medication or another illness. Symptoms of an adrenal crisis include severe nausea, confusion, fever, headache, and weakness. If left untreated, it can be life-threatening. In case of severe symptoms, immediate medical attention is required.
Generated Response: Addison's disease

### Step 1: Identify the key features of the disease described in the question
The key features of the disease include fatigue, episodes of weakness, and the need for lifelong

Processing with RAG:   3%|▎         | 13/410 [00:39<19:22,  2.93s/it]

--- Record 13 ---
Symptoms: This condition occurs when the lining of the womb (endometrium) grows into the muscular wall of the uterus. It can affect the entire womb or only a specific part of it. While not life-threatening, the symptoms can significantly impact daily life, making it important to seek appropriate support and management.
Generated Response: Endometriosis

Endometriosis is a condition in which the lining of the womb (endometrium) grows into the muscular wall of the uterus.

Endometriosis is characterized by the presence of endometrial tissue in the per




Processing with RAG:   3%|▎         | 14/410 [00:41<19:11,  2.91s/it]

--- Record 14 ---
Symptoms: This condition refers to liver damage caused by excessive alcohol consumption, progressing through various stages of severity. In its early stages, there are often no noticeable symptoms. However, once significant liver damage occurs, symptoms may include nausea, weight loss, loss of appetite, yellowing of the eyes and skin (jaundice), swelling in the ankles and abdomen, confusion or drowsiness, and, in severe cases, vomiting blood or passing blood in stools.
Generated Response: Alcohol-induced liver disease

### Step 1: Identify the disease from the given symptoms
The disease described in the question is characterized by liver damage caused by excessive alcohol consumption, which progresses through various stages of severity.

### Step 2: Determine




Processing with RAG:   4%|▎         | 15/410 [00:44<19:11,  2.91s/it]

--- Record 15 ---
Symptoms: Mild cases of this condition can often be managed with self-care. While medication does not cure the underlying allergy, it can help alleviate symptoms. It is important to read the instructions provided with any medication and follow the recommended dosage. If symptoms persist beyond two weeks despite treatment, consulting a healthcare provider is advisable, as stronger medications, such as corticosteroid nasal sprays, may be required.
Generated Response: Pruritus

### Step 1: Analyze the symptoms described in the question
The symptoms described are itching, which is a generalised and localised sensation that compels a person to scratch the affected area.

### Step 2: Match




Processing with RAG:   4%|▍         | 16/410 [00:47<19:05,  2.91s/it]

--- Record 16 ---
Symptoms: Symptoms of an allergic reaction typically appear within minutes of exposure to an allergen, although in some cases, they may develop gradually over several hours. While most allergic reactions are mild and can be bothersome, they do not usually interfere significantly with daily activities. However, in rare cases, a severe reaction known as anaphylaxis can occur, requiring immediate medical attention.
Generated Response: Allergies
Symptoms of an allergic reaction typically appear within minutes of exposure to an allergen, although in some cases, they may develop gradually over several hours. While most allergic reactions are mild and can be bothersome, they do not usually




Processing with RAG:   4%|▍         | 17/410 [00:50<18:57,  2.89s/it]

--- Record 17 ---
Symptoms: This is a progressive condition, meaning that symptoms gradually worsen over time. The earliest signs often involve minor memory problems, such as a loss of interest and enjoyment in daily activities. It is also common to forget recent conversations, events, or place names. As the condition progresses, memory problems become more severe, impacting daily life more significantly.
Generated Response: Dementia

### Step 1: Identify the key symptoms described by the user
The user describes minor memory problems, such as a loss of interest and enjoyment in daily activities, forgetting recent conversations, events, or place names, and memory problems becoming




Processing with RAG:   4%|▍         | 18/410 [00:53<18:52,  2.89s/it]

--- Record 18 ---
Symptoms: The symptoms of this condition often resemble those of more common and less serious anal issues, such as hemorrhoids or anal fissures. Signs may include rectal bleeding, itching and pain around the anus, small lumps near the anal area, mucus discharge, and loss of bowel control (bowel incontinence).
Generated Response: Anal cancer

### Step 1: Identify the disease described in the user question.
The user question describes a condition with symptoms such as rectal bleeding, itching and pain around the anus, small lumps near the anal area, mucus discharge,




Processing with RAG:   5%|▍         | 19/410 [00:56<18:44,  2.88s/it]

--- Record 19 ---
Symptoms: Even after receiving adrenaline, individuals experiencing this condition should be taken to a hospital for observation, typically for 6 to 12 hours, as symptoms may occasionally reappear. During hospitalization, oxygen therapy may be provided to assist with breathing, and intravenous fluids may be administered to help stabilize blood pressure. In addition to adrenaline, other medications such as antihistamines and corticosteroids may be used to alleviate symptoms. Blood tests might also be conducted to confirm the condition. Discharge from the hospital is typically possible once symptoms are under control and unlikely to return quickly, though severe cases may require a longer stay. Patients may be advised to continue taking antihistamines and corticosteroids for 2 to 3 days after leaving the hospital to prevent recurrence.
Generated Response: Chronic pain

### Step 1: Identify key features of the condition described in the question.
The condition described 

Processing with RAG:   5%|▍         | 20/410 [00:59<18:41,  2.88s/it]

--- Record 20 ---
Symptoms: The primary symptom of this condition is chest pain, which may present as a dull ache, a heavy sensation, or a feeling of tightness in the chest. The pain can also spread to the arms, neck, jaw, or back and is often triggered by physical exertion. While this condition itself is not life-threatening, prolonged chest pain could indicate a more serious event, such as a heart attack.
Generated Response: Angina

### Step 1: Determine the primary symptom of the condition
The primary symptom of the condition is chest pain, which may present as a dull ache, a heavy sensation, or a feeling of tightness in the chest.

### Step




Processing with RAG: 100%|██████████| 410/410 [19:54<00:00,  2.91s/it]
