In [2]:
# Install Required Libraries in Terminal
# pip install transformers datasets accelerate peft bitsandbytes torch openpyxl bert_score
# Done

In [5]:
pip install huggingface_hub transformers datasets accelerate peft bitsandbytes torch openpyxl bert_score

Note: you may need to restart the kernel to use updated packages.


In [6]:
# disables tokenizer thread parallelism, 
# telling Hugging Face to run tokenization single-threaded, avoiding the risk of threading issues across forked processes.
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

Login into hugging face to get access to LLM

In [None]:
from huggingface_hub import login
login("") # USE YOUR OWN TOKEN

## Load the model

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM  # Import tokenizer and model loader
import torch  # PyTorch for handling tensors and model inference
import pandas as pd  # Pandas for reading and processing the dataset
import bert_score  # BERTScore for evaluating model-generated responses

# ========== 1. Prevent model from being downloaded repeatedly ==========
model_id = "meta-llama/Llama-3.2-3B-Instruct"  # Model name
cache_dir = "/workspace/model_cache"  # Set cache directory to store model locally

# Ensure the cache directory exists
import os
os.makedirs(cache_dir, exist_ok=True)

# Load tokenizer from cache (if available); otherwise, download and store it
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir)

# Load model with reduced memory usage and automatic GPU allocation
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,  # Use bfloat16 to save memory while maintaining precision
    device_map="auto",  # Automatically assigns model to GPU (if available)
    cache_dir=cache_dir  # Store model in cache to avoid re-downloading
)

print("Model successfully loaded from cache:", cache_dir)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Model successfully loaded from cache: /workspace/model_cache


# FROM HERE, 
# after reload the NEW & Trained model, 
# you can re-run the codes below till the end of BLEU

In [19]:
# ========== 2. Load dataset ==========
data_path = "VerifiedQ_A.xlsx"  # Path to dataset file
df = pd.read_excel(data_path)  # Load the dataset into a DataFrame
print(df.head(10))  # Print the first few rows to verify correct loading


                      AnswerID    Unnamed: 2                    Unnamed: 3  \
0        ADAM_0002818_Sec1.txt  3-Incomplete         ADAM_0002818_Sec1.txt   
1        ADAM_0002818_Sec2.txt  3-Incomplete         ADAM_0002818_Sec2.txt   
2        GARD_0004450_Sec1.txt  3-Incomplete         GARD_0004450_Sec1.txt   
3        GARD_0004450_Sec2.txt  3-Incomplete         GARD_0004450_Sec2.txt   
4        GARD_0004450_Sec3.txt  3-Incomplete         GARD_0004450_Sec3.txt   
5         GHR_0000804_Sec1.txt  3-Incomplete          GHR_0000804_Sec1.txt   
6         GHR_0000804_Sec3.txt  3-Incomplete          GHR_0000804_Sec3.txt   
7         GHR_0000804_Sec4.txt  3-Incomplete          GHR_0000804_Sec4.txt   
8  MPlusDrugs_0001309_Sec2.txt  3-Incomplete   MPlusDrugs_0001309_Sec2.txt   
9  MPlusDrugs_0001309_Sec5.txt  3-Incomplete   MPlusDrugs_0001309_Sec5.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What c

In [20]:
# ========== 3. Generate answers using Llama 3.2 ==========

# generate_answer() with Post-Trim Cleanup
# ISSUE: Llama always generate its model answer along with the question itself, unnecessarily consuming the max_token space.
# ISSUE: it would be problematic if the question is very long!!!

#Revised generate_answer() with Improved Echo Trimming
def generate_answer(question):
    """Generates a clean model response from LLaMA using a direct instruction prompt."""

    if not isinstance(question, str):
        return "Skipped"

    # Instruction-style prompt to guide model toward answering
    prompt = f"Answer concisely:\n\n{question}\n\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=256,
            pad_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True).strip()

    # Strip everything before "Answer:" if model includes it in output
    if "Answer:" in decoded:
        decoded = decoded.split("Answer:")[-1].strip()

    return decoded


# Apply model to generate answers only for valid text questions
df["Model_Answer"] = df["Question"].apply(generate_answer)

# Print skipped questions for debugging
skipped_count = (df["Model_Answer"] == "Skipped").sum()
print(f"⚠️ {skipped_count} questions were skipped due to invalid format.")

⚠️ 0 questions were skipped due to invalid format.


Save the data to make sure all evaluations are based on the same data

In [21]:
df.to_excel("generated_answers.xlsx", index=False)
print("DataFrame saved as generated_answers.xlsx")

DataFrame saved as generated_answers.xlsx


## For BertScore

In [22]:
import pandas as pd

# Load the saved Excel file
df = pd.read_excel("generated_answers.xlsx")

print("✅ DataFrame reloaded successfully!")
print(df.head())  # Display the first few rows to verify

✅ DataFrame reloaded successfully!
                AnswerID    Unnamed: 2              Unnamed: 3  \
0  ADAM_0002818_Sec1.txt  3-Incomplete   ADAM_0002818_Sec1.txt   
1  ADAM_0002818_Sec2.txt  3-Incomplete   ADAM_0002818_Sec2.txt   
2  GARD_0004450_Sec1.txt  3-Incomplete   GARD_0004450_Sec1.txt   
3  GARD_0004450_Sec2.txt  3-Incomplete   GARD_0004450_Sec2.txt   
4  GARD_0004450_Sec3.txt  3-Incomplete   GARD_0004450_Sec3.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What causes Noonan syndrome ?   
2  What is (are) Noonan syndrome ? (Also called: ...   
3  What are the symptoms of Noonan syndrome ? (Al...   
4  Is Noonan syndrome inherited ? (Also called: M...   

                                              Answer  \
0  Noonan syndrome is a disease that can be passe...   
1  Noonan syndrome is linked to defects in severa...   
2  Noonan syndrome is a genetic disorder that cau...   
3  What

In [23]:
# ========== 4. Compute BERTScore ==========
import bert_score
# Filter out skipped answers before evaluation
valid_rows = df[df["Model_Answer"] != "Skipped"].copy()  # Keep only valid responses

# Ensure that valid_rows has aligned Model Answers and Reference Answers
P, R, F1 = bert_score.score(
    valid_rows["Model_Answer"].tolist(),  # Only valid generated answers
    valid_rows["Answer"].tolist(),  # Corresponding reference answers
    lang="en"  # Specify language as English
)

# Store computed BERTScore metrics in the filtered DataFrame
valid_rows["BERT_Precision"] = P.tolist()
valid_rows["BERT_Recall"] = R.tolist()
valid_rows["BERT_F1"] = F1.tolist()

# ========== 5. Save results ==========
valid_rows.to_csv("bert_score_results.csv", index=False)  # Save only valid results
print(f"✅ Computation completed. {len(valid_rows)} valid responses evaluated.")
print("Results saved as `bert_score_results.csv`")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Computation completed. 150 valid responses evaluated.
Results saved as `bert_score_results.csv`


## For DeepEval Scores

In [1]:
!pip install --upgrade pydantic typing_extensions deepeval

[0m

I RESTART the kernel due to unexpected issue.

This time, I will simply load the saved df file generated_answers.xlsx

if you see some red sign from BertScore, that is because I restart the kernel.

In [3]:
import pandas as pd

# Load the saved Excel file
df = pd.read_excel("generated_answers.xlsx")

print("✅ DataFrame reloaded successfully!")
print(df.head())  # Display the first few rows to verify

✅ DataFrame reloaded successfully!
                AnswerID    Unnamed: 2              Unnamed: 3  \
0  ADAM_0002818_Sec1.txt  3-Incomplete   ADAM_0002818_Sec1.txt   
1  ADAM_0002818_Sec2.txt  3-Incomplete   ADAM_0002818_Sec2.txt   
2  GARD_0004450_Sec1.txt  3-Incomplete   GARD_0004450_Sec1.txt   
3  GARD_0004450_Sec2.txt  3-Incomplete   GARD_0004450_Sec2.txt   
4  GARD_0004450_Sec3.txt  3-Incomplete   GARD_0004450_Sec3.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What causes Noonan syndrome ?   
2  What is (are) Noonan syndrome ? (Also called: ...   
3  What are the symptoms of Noonan syndrome ? (Al...   
4  Is Noonan syndrome inherited ? (Also called: M...   

                                              Answer  \
0  Noonan syndrome is a disease that can be passe...   
1  Noonan syndrome is linked to defects in severa...   
2  Noonan syndrome is a genetic disorder that cau...   
3  What

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "" # USE YOUR OWN TOKEN!

In [5]:
# ========== 6. Compute DeepEval Correctness ==========
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCase, LLMTestCaseParams

# Initialize metric
correctness_metric = GEval(
    name="Correctness",
    criteria="Determine whether the actual output is factually correct based on the expected output. Provide 1 for correct and 0 for incorrect.",
    evaluation_steps=[
        "Check if 'actual output' contradicts 'expected output'",
        "Only penalize omission of important details"
    ],
    evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
    model="gpt-4o-mini"  # or "gpt-4", "gpt-3.5-turbo", etc.
)

# Filter valid rows again
valid_rows = df[
    (df["Model_Answer"].notna()) &
    (df["Answer"].notna()) &
    (df["Question"].notna()) &
    (df["Model_Answer"] != "Skipped")
].copy()

# Prepare to collect scores
correctness_scores = []

for i in range(len(valid_rows)):
    try:
        question = valid_rows["Question"].iloc[i]
        model_answer = valid_rows["Model_Answer"].iloc[i]
        reference_answer = valid_rows["Answer"].iloc[i]

        print(f"\n--- [Row {i}] ---")
        print("Question:", question[:100])
        print("Model_Answer:", model_answer[:100])
        print("Reference Answer:", reference_answer[:100])

        test_case = LLMTestCase(
            input=question,
            actual_output=model_answer,
            context=[reference_answer]
        )

        result = correctness_metric.measure(test_case)

        # ✅ THIS is the actual correctness score
        score = float(result)
        correctness_scores.append(score)

        print(f"[{i}] 🎯 Correctness Score (raw): {score}")

    except Exception as e:
        print(f"[{i}] ⚠️ Error: {e}")
        correctness_scores.append(0.0)
        
# Assign the scores
valid_rows["DeepEval_Correctness"] = correctness_scores

# ========== 7. Save Final Results ==========
valid_rows.to_excel("final_eval_results.xlsx", index=False)
print("✅ Final results saved to final_eval_results.xlsx")

Output()


--- [Row 0] ---
Question: What is (are) Noonan syndrome ?
Model_Answer: Noonan syndrome is a disease that can be passed down through families (inherited). It causes many pa
Reference Answer: Noonan syndrome is a disease that can be passed down through families (inherited). It causes many pa


Output()

[0] 🎯 Correctness Score (raw): 0.7810623892820897

--- [Row 1] ---
Question: What causes Noonan syndrome ?
Model_Answer: Noonan syndrome is linked to defects in several genes. Certain proteins involved in growth and devel
Reference Answer: Noonan syndrome is linked to defects in several genes. Certain proteins involved in growth and devel


Output()

[1] 🎯 Correctness Score (raw): 0.6326306077523007

--- [Row 2] ---
Question: What is (are) Noonan syndrome ? (Also called: Male Turner Syndrome; Noonan-Ehmke syndrome; Ullrich-N
Model_Answer: Noonan syndrome is a genetic disorder that causes abnormal development of multiple parts of the body
Reference Answer: Noonan syndrome is a genetic disorder that causes abnormal development of multiple parts of the body


Output()

[2] 🎯 Correctness Score (raw): 0.7260257600474922

--- [Row 3] ---
Question: What are the symptoms of Noonan syndrome ? (Also called: Male Turner Syndrome; Noonan-Ehmke syndrome
Model_Answer: What are the signs and symptoms of Noonan syndrome? The Human Phenotype Ontology provides the follow
Reference Answer: What are the signs and symptoms of Noonan syndrome? The Human Phenotype Ontology provides the follow


Output()

[3] 🎯 Correctness Score (raw): 0.7883687944857891

--- [Row 4] ---
Question: Is Noonan syndrome inherited ? (Also called: Male Turner Syndrome; Noonan-Ehmke syndrome; Ullrich-No
Model_Answer: How is Noonan syndrome inherited? Noonan syndrome is inherited in an autosomal dominant manner. This
Reference Answer: How is Noonan syndrome inherited? Noonan syndrome is inherited in an autosomal dominant manner. This


Output()

[4] 🎯 Correctness Score (raw): 0.8385195751186574

--- [Row 5] ---
Question: What is (are) polycystic kidney disease ? (Also called: PKD; polycystic renal disease)
Model_Answer: Polycystic kidney disease is a disorder that affects the kidneys and other organs. Clusters of fluid
Reference Answer: Polycystic kidney disease is a disorder that affects the kidneys and other organs. Clusters of fluid


Output()

[5] 🎯 Correctness Score (raw): 0.8140259235406587

--- [Row 6] ---
Question: What are the genetic changes related to polycystic kidney disease ? (Also called: PKD; polycystic re
Model_Answer: Mutations in the PKD1, PKD2, and PKHD1 genes cause polycystic kidney disease.  Mutations in either t
Reference Answer: Mutations in the PKD1, PKD2, and PKHD1 genes cause polycystic kidney disease.  Mutations in either t


Output()

[6] 🎯 Correctness Score (raw): 0.9056778884616244

--- [Row 7] ---
Question: Is polycystic kidney disease inherited ? (Also called: PKD; polycystic renal disease)
Model_Answer: Most cases of polycystic kidney disease have an autosomal dominant pattern of inheritance. People wi
Reference Answer: Most cases of polycystic kidney disease have an autosomal dominant pattern of inheritance. People wi


Output()

[7] 🎯 Correctness Score (raw): 0.87072710615627

--- [Row 8] ---
Question: How should Zolmitriptan be used and what is the dosage ?
Model_Answer: Zolmitriptan comes as a tablet and an orally disintegrating tablet (tablet that dissolves quickly in
Reference Answer: Zolmitriptan comes as a tablet and an orally disintegrating tablet (tablet that dissolves quickly in


Output()

[8] 🎯 Correctness Score (raw): 0.7381560904055384

--- [Row 9] ---
Question: What are the side effects or risks of Zolmitriptan ?
Model_Answer: Zolmitriptan may cause side effects. Tell your doctor if any of these symptoms are severe or do not 
Reference Answer: Zolmitriptan may cause side effects. Tell your doctor if any of these symptoms are severe or do not 


Output()

[9] 🎯 Correctness Score (raw): 0.9404931597807824

--- [Row 10] ---
Question: What causes Syphilis - primary ? (Also called: Primary syphilis; Secondary syphilis; Late syphilis; 
Model_Answer: Syphilis is a sexually transmitted, infectious disease caused by the spirochete bacterium Treponema 
Reference Answer: Syphilis is a sexually transmitted, infectious disease caused by the spirochete bacterium Treponema 


Output()

[10] 🎯 Correctness Score (raw): 0.6229214541465331

--- [Row 11] ---
Question: What are the symptoms of Syphilis - primary ? (Also called: Primary syphilis; Secondary syphilis; La
Model_Answer: Symptoms of primary syphilis are:  - Small, painless open sore or ulcer (called a chancre) on the ge
Reference Answer: Symptoms of primary syphilis are:  - Small, painless open sore or ulcer (called a chancre) on the ge


Output()

[11] 🎯 Correctness Score (raw): 0.49191051449297724

--- [Row 12] ---
Question: Do I need to see a doctor for Syphilis - primary ? (Also called: Primary syphilis; Secondary syphili
Model_Answer: Call for an appointment with your health care provider if you have symptoms of syphilis.  Contact yo
Reference Answer: Call for an appointment with your health care provider if you have symptoms of syphilis.  Contact yo


Output()

[12] 🎯 Correctness Score (raw): 0.7699007965342963

--- [Row 13] ---
Question: What is (are) Syphilis ?
Model_Answer: Syphilis is a sexually transmitted disease caused by bacteria. It infects the genital area, lips, mo
Reference Answer: Syphilis is a sexually transmitted disease caused by bacteria. It infects the genital area, lips, mo


Output()

[13] 🎯 Correctness Score (raw): 0.8467307326979547

--- [Row 14] ---
Question: How to diagnose Syphilis - primary ? (Also called: Primary syphilis; Secondary syphilis; Late syphil
Model_Answer: The doctor or nurse will examine you. Tests that may be done include:  - Examination of fluid from s
Reference Answer: The doctor or nurse will examine you. Tests that may be done include:  - Examination of fluid from s


Output()

[14] 🎯 Correctness Score (raw): 0.6890464638037613

--- [Row 15] ---
Question: What are the treatments for Syphilis - primary ? (Also called: Primary syphilis; Secondary syphilis;
Model_Answer: Syphilis can be treated with antibiotics, such as:   - Doxycycline  - Penicillin G benzathine  - Tet
Reference Answer: Syphilis can be treated with antibiotics, such as:   - Doxycycline  - Penicillin G benzathine  - Tet


Output()

[15] 🎯 Correctness Score (raw): 0.7504562260301669

--- [Row 16] ---
Question: How to prevent Syphilis - primary ? (Also called: Primary syphilis; Secondary syphilis; Late syphili
Model_Answer: If you are sexually active, practice safe sex and always use a condom.  All pregnant women should be
Reference Answer: If you are sexually active, practice safe sex and always use a condom.  All pregnant women should be


Output()

[16] 🎯 Correctness Score (raw): 0.5350873850643881

--- [Row 17] ---
Question: How should Glucagon be used and what is the dosage ?
Model_Answer: Glucagon is usually given by injection beneath the skin, in the muscle, or in the vein. It comes as 
Reference Answer: Glucagon is usually given by injection beneath the skin, in the muscle, or in the vein. It comes as 


Output()

[17] 🎯 Correctness Score (raw): 0.6524840545760398

--- [Row 18] ---
Question: What causes Laryngeal nerve damage ? (Also called: Vocal cord paralysis)
Model_Answer: Injury to the laryngeal nerves is uncommon.  When it does occur, it can be from:  - A complication o
Reference Answer: Injury to the laryngeal nerves is uncommon.  When it does occur, it can be from:  - A complication o


Output()

[18] 🎯 Correctness Score (raw): 0.6945822346029265

--- [Row 19] ---
Question: Do you have information about Birth control pill overdose
Model_Answer: Summary : Birth control pills, also called oral contraceptives, are prescription medicines used to p
Reference Answer: Summary : Birth control pills, also called oral contraceptives, are prescription medicines used to p


Output()

[19] 🎯 Correctness Score (raw): 0.8742497090128916

--- [Row 20] ---
Question: What causes Deep Vein Thrombosis ?
Model_Answer: Blood clots can form in your body's deep veins if:  - You have a blood clot in another part of your 
Reference Answer: Blood clots can form in your body's deep veins if:


Output()

[20] 🎯 Correctness Score (raw): 0.7484496439678527

--- [Row 21] ---
Question: What causes Post-streptococcal glomerulonephritis (GN) ? (Also called: Glomerulonephritis - post-str
Model_Answer: Post-streptococcal GN is a form of glomerulonephritis. It is caused by an infection with a type of s
Reference Answer: Post-streptococcal GN is a form of glomerulonephritis. It is caused by an infection with a type of s


Output()

[21] 🎯 Correctness Score (raw): 0.8146093445231287

--- [Row 22] ---
Question: What is (are) granulomatosis with polyangiitis ? (Also called: GPA; Wegener granulomatosis; Wegener'
Model_Answer: Granulomatosis with polyangiitis (GPA) is a condition that causes inflammation that primarily affect
Reference Answer: Granulomatosis with polyangiitis (GPA) is a condition that causes inflammation that primarily affect


Output()

[22] 🎯 Correctness Score (raw): 0.8130291509379906

--- [Row 23] ---
Question: What is (are) Streptococcal Infections ? (Also called: Strep)
Model_Answer: Streptococcal infections are caused by bacteria of the genus Streptococcus. There are two types: gro
Reference Answer: Strep is short for Streptococcus, a type of bacteria. There are two types: group A and group B.    G


Output()

[23] 🎯 Correctness Score (raw): 0.8396815511946926

--- [Row 24] ---
Question: What causes Granulomatosis with polyangiitis ? (Also called: Formerly: Wegener's granulomatosis)
Model_Answer: GPA mainly affects blood vessels in the nose, sinuses, ears, lungs, and kidneys. Other areas may als
Reference Answer: GPA mainly affects blood vessels in the nose, sinuses, ears, lungs, and kidneys. Other areas may als


Output()

[24] 🎯 Correctness Score (raw): 0.6095701560916656

--- [Row 25] ---
Question: What is (are) Streptococcal Infections ? (Also called: Strep)
Model_Answer: Streptococcal infections are caused by bacteria of the genus Streptococcus. There are two types: gro
Reference Answer: Strep is short for Streptococcus, a type of bacteria. There are two types: group A and group B.    G


Output()

[25] 🎯 Correctness Score (raw): 0.8332256232663042

--- [Row 26] ---
Question: What causes Macular degeneration - age-related ? (Also called: Age-related macular degeneration (ARM
Model_Answer: The retina is at the back of the eye. It changes light and images that enter the eye into nerve sign
Reference Answer: The retina is at the back of the eye. It changes light and images that enter the eye into nerve sign


Output()

[26] 🎯 Correctness Score (raw): 0.633419767651066

--- [Row 27] ---
Question: What are the genetic changes related to age-related macular degeneration ? (Also called: age-related
Model_Answer: Age-related macular degeneration results from a combination of genetic and environmental factors. Ma
Reference Answer: Age-related macular degeneration results from a combination of genetic and environmental factors. Ma


Output()

[27] 🎯 Correctness Score (raw): 0.8161093810940487

--- [Row 28] ---
Question: What causes Hydatidiform mole ? (Also called: Hydatid mole; Molar pregnancy)
Model_Answer: Hydatidiform mole, or molar pregnancy, results from too much production of the tissue that is suppos
Reference Answer: Hydatidiform mole, or molar pregnancy, results from too much production of the tissue that is suppos


Output()

[28] 🎯 Correctness Score (raw): 0.8057687920947483

--- [Row 29] ---
Model_Answer: Estradiol increases the risk that you will develop endometrial cancer (cancer of the lining of the u
Reference Answer: Estradiol increases the risk that you will develop endometrial cancer (cancer of the lining of the u


Output()

[29] 🎯 Correctness Score (raw): 0.8213732154326386

--- [Row 30] ---
Question: What is (are) Ear discharge ? (Also called: Drainage from the ear; Otorrhea; Ear bleeding; Bleeding 
Model_Answer: Ear discharge is drainage of blood, ear wax, pus, or fluid from the ear.)  Causes and risk factors i
Reference Answer: Ear discharge is drainage of blood, ear wax, pus, or fluid from the ear.)


Output()

[30] 🎯 Correctness Score (raw): 0.6354739125398013

--- [Row 31] ---
Question: What causes Ear discharge ? (Also called: Drainage from the ear; Otorrhea; Ear bleeding; Bleeding fr
Model_Answer: Most of the time, any fluid leaking out of an ear is ear wax.  A ruptured eardrum can cause a white,
Reference Answer: Most of the time, any fluid leaking out of an ear is ear wax.  A ruptured eardrum can cause a white,


Output()

[31] 🎯 Correctness Score (raw): 0.8263695912685307

--- [Row 32] ---
Question: What to do for Ear discharge ? (Also called: Drainage from the ear; Otorrhea; Ear bleeding; Bleeding
Model_Answer: Call for an appointment with your health care provider if you have any of the following symptoms:   
Reference Answer: Caring for ear discharge at home depends on the cause.)


Output()

[32] 🎯 Correctness Score (raw): 0.7402314230051891

--- [Row 33] ---
Question: Do you have information about Ear wax (Also called: Ear impaction; Cerumen impaction; Ear blockage)
Model_Answer: Summary : The ear canal is lined with hair follicles and glands that produce a waxy oil called cerum
Reference Answer: Summary : The ear canal is lined with hair follicles and glands that produce a waxy oil called cerum


Output()

[33] 🎯 Correctness Score (raw): 0.8

--- [Row 34] ---
Question: What is (are) Swimmer's ear ? (Also called: Ear infection - outer ear - acute; Otitis externa - acut
Model_Answer: Swimmer's ear is inflammation, irritation, or infection of the outer ear and ear canal. The medical 
Reference Answer: Swimmer's ear is inflammation, irritation, or infection of the outer ear and ear canal. The medical 


Output()

[34] 🎯 Correctness Score (raw): 0.6066355050397734

--- [Row 35] ---
Question: What is (are) Beckwith-Wiedemann syndrome ?
Model_Answer: Beckwith-Wiedemann syndrome is a growth disorder that causes large body size, large organs, and othe
Reference Answer: Beckwith-Wiedemann syndrome is a growth disorder that causes large body size, large organs, and othe


Output()

[35] 🎯 Correctness Score (raw): 0.7697964331225113

--- [Row 36] ---
Question: What causes Beckwith-Wiedemann syndrome ?
Model_Answer: Beckwith-Wiedemann syndrome is caused by a defect in the genes on chromosome 11. About 10% of cases 
Reference Answer: Beckwith-Wiedemann syndrome is caused by a defect in the genes on chromosome 11. About 10% of cases 


Output()

[36] 🎯 Correctness Score (raw): 0.7871625039584003

--- [Row 37] ---
Question: What are the symptoms of Beckwith-Wiedemann syndrome ?
Model_Answer: - Large size for a newborn  - Red birth mark on forehead or eyelids (Nevus flammeus)  - Creases in e
Reference Answer: Signs and symptoms of Beckwith-Wiedemann syndrome include:  - Large size for a newborn  - Red birth 


Output()

[37] 🎯 Correctness Score (raw): 0.5691474115544838

--- [Row 38] ---
Question: What are the complications of Beckwith-Wiedemann syndrome ?
Model_Answer: These complications can occur:  - Development of tumors  - Feeding problems due to enlarged tongue  
Reference Answer: These complications can occur:  - Development of tumors  - Feeding problems due to enlarged tongue  


Output()

[38] 🎯 Correctness Score (raw): 0.6215521191930051

--- [Row 39] ---
Question: Do I need to see a doctor for Beckwith-Wiedemann syndrome ?
Model_Answer: No, Beckwith-Wiedemann syndrome is a genetic condition that cannot be treated with medication. Do no
Reference Answer: If you have a child with Beckwith-Wiedemann syndrome and worrisome symptoms develop, call your pedia


Output()

[39] 🎯 Correctness Score (raw): 0.4230894790313428

--- [Row 40] ---
Question: How to prevent Beckwith-Wiedemann syndrome ?
Model_Answer: There is no known prevention for Beckwith-Wiedemann syndrome. Genetic counseling may be of value for
Reference Answer: There is no known prevention for Beckwith-Wiedemann syndrome. Genetic counseling may be of value for


Output()

[40] 🎯 Correctness Score (raw): 0.4967660837775131

--- [Row 41] ---
Question: What is (are) Beckwith-Wiedemann syndrome ? (Also called: BWS; Wiedemann-Beckwith syndrome (WBS))
Model_Answer: Beckwith-Wiedemann syndrome is a condition that affects many parts of the body. It is classified as 
Reference Answer: Beckwith-Wiedemann syndrome is a condition that affects many parts of the body. It is classified as 


Output()

[41] 🎯 Correctness Score (raw): 0.7370279375413435

--- [Row 42] ---
Question: How many people are affected by Beckwith-Wiedemann syndrome ? (Also called: BWS; Wiedemann-Beckwith 
Model_Answer: Beckwith-Wiedemann syndrome affects an estimated 1 in 13,700 newborns worldwide. The condition may a
Reference Answer: Beckwith-Wiedemann syndrome affects an estimated 1 in 13,700 newborns worldwide. The condition may a


Output()

[42] 🎯 Correctness Score (raw): 0.8015385826209481

--- [Row 43] ---
Question: What are the genetic changes related to Beckwith-Wiedemann syndrome ? (Also called: BWS; Wiedemann-B
Model_Answer: The genetic causes of Beckwith-Wiedemann syndrome are complex. The condition usually results from th
Reference Answer: The genetic causes of Beckwith-Wiedemann syndrome are complex. The condition usually results from th


Output()

[43] 🎯 Correctness Score (raw): 0.8157212614930215

--- [Row 44] ---
Question: Is Beckwith-Wiedemann syndrome inherited ? (Also called: BWS; Wiedemann-Beckwith syndrome (WBS))
Model_Answer: In about 85 percent of cases of Beckwith-Wiedemann syndrome, only one person in a family has been di
Reference Answer: In about 85 percent of cases of Beckwith-Wiedemann syndrome, only one person in a family has been di


Output()

[44] 🎯 Correctness Score (raw): 0.8194742658680798

--- [Row 45] ---
Question: What are the treatments for Beckwith-Wiedemann syndrome ? (Also called: BWS; Wiedemann-Beckwith synd
Model_Answer: These resources address the diagnosis or management of Beckwith-Wiedemann syndrome:  - Gene Review: 
Reference Answer: These resources address the diagnosis or management of Beckwith-Wiedemann syndrome:  - Gene Review: 


Output()

[45] 🎯 Correctness Score (raw): 0.37000538800565724

--- [Row 46] ---
Question: Do you have information about Uvulopalatopharyngoplasty (UPPP) (Also called: Palate surgery; Uvulopa
Model_Answer: Summary : Uvulopalatopharyngoplasty (UPPP) is surgery to open the upper airways by taking out extra 
Reference Answer: Summary : Uvulopalatopharyngoplasty (UPPP) is surgery to open the upper airways by taking out extra 


Output()

[46] 🎯 Correctness Score (raw): 0.702553982709156

--- [Row 47] ---
Question: What causes Uveitis ? (Also called: Iritis; Pars planitis; Choroiditis; Chorioretinitis; Anterior uv
Model_Answer: Uveitis can be caused by autoimmune disorders, including rheumatoid arthritis or ankylosing spondyli
Reference Answer: Uveitis can be caused by autoimmune disorders, including rheumatoid arthritis or ankylosing spondyli


Output()

[47] 🎯 Correctness Score (raw): 0.7444510631386045

--- [Row 48] ---
Question: Do you have information about Urine and Urination
Model_Answer: Summary : Your kidneys make urine by filtering wastes and extra water from your blood. The waste is 
Reference Answer: Summary : Your kidneys make urine by filtering wastes and extra water from your blood. The waste is 


Output()

[48] 🎯 Correctness Score (raw): 0.8127975463072297

--- [Row 49] ---
Question: What causes Diaphragmatic hernia ? (Also called: Hernia - diaphragmatic; Congenital hernia of the di
Model_Answer: A diaphragmatic hernia is a rare defect. It occurs while the baby is developing in the womb. Because
Reference Answer: A diaphragmatic hernia is a rare defect. It occurs while the baby is developing in the womb. Because


Output()

[49] 🎯 Correctness Score (raw): 0.785983705311766

--- [Row 50] ---
Question: What is the outlook for Diaphragmatic hernia ? (Also called: Hernia - diaphragmatic; Congenital hern
Model_Answer: The outcome of surgery depends on how well the baby's lungs have developed. It also depends on wheth
Reference Answer: The outcome of surgery depends on how well the baby's lungs have developed. It also depends on wheth


Output()

[50] 🎯 Correctness Score (raw): 0.7127826565608706

--- [Row 51] ---
Question: What are the genetic changes related to congenital diaphragmatic hernia ? (Also called: congenital d
Model_Answer: Congenital diaphragmatic hernia has many different causes. In 10 to 15 percent of affected individua
Reference Answer: Congenital diaphragmatic hernia has many different causes. In 10 to 15 percent of affected individua


Output()

[51] 🎯 Correctness Score (raw): 0.7076529152142177

--- [Row 52] ---
Question: Is congenital diaphragmatic hernia inherited ? (Also called: congenital diaphragmatic defect)
Model_Answer: Isolated congenital diaphragmatic hernia is rarely inherited. In almost all cases, there is only one
Reference Answer: Isolated congenital diaphragmatic hernia is rarely inherited. In almost all cases, there is only one


Output()

[52] 🎯 Correctness Score (raw): 0.7209850522183221

--- [Row 53] ---
Question: What is (are) Shingles ?
Model_Answer: Shingles is a painful rash that develops on one side of the face or body. The rash forms blisters th
Reference Answer: Shingles is a painful rash that develops on one side of the face or body. The rash forms blisters th


Output()

[53] 🎯 Correctness Score (raw): 0.7810623898207898

--- [Row 54] ---
Question: What is (are) antiphospholipid syndrome ? (Also called: anti-phospholipid syndrome; antiphospholipid
Model_Answer: Antiphospholipid syndrome is a disorder characterized by an increased tendency to form abnormal bloo
Reference Answer: Antiphospholipid syndrome is a disorder characterized by an increased tendency to form abnormal bloo


Output()

[54] 🎯 Correctness Score (raw): 0.7838906190046948

--- [Row 55] ---
Question: What are the genetic changes related to antiphospholipid syndrome ? (Also called: anti-phospholipid 
Model_Answer: The genetic cause of antiphospholipid syndrome is unknown. This condition is associated with the pre
Reference Answer: The genetic cause of antiphospholipid syndrome is unknown. This condition is associated with the pre


Output()

[55] 🎯 Correctness Score (raw): 0.6013754930866344

--- [Row 56] ---
Question: Is antiphospholipid syndrome inherited ? (Also called: anti-phospholipid syndrome; antiphospholipid 
Model_Answer: Most cases of antiphospholipid syndrome are sporadic, which means they occur in people with no histo
Reference Answer: Most cases of antiphospholipid syndrome are sporadic, which means they occur in people with no histo


Output()

[56] 🎯 Correctness Score (raw): 0.7035636230868172

--- [Row 57] ---
Question: What are the treatments for antiphospholipid syndrome ? (Also called: anti-phospholipid syndrome; an
Model_Answer: These resources address the diagnosis or management of antiphospholipid syndrome:  - Genetic Testing
Reference Answer: These resources address the diagnosis or management of antiphospholipid syndrome:  - Genetic Testing


Output()

[57] 🎯 Correctness Score (raw): 0.34104155611655496

--- [Row 58] ---
Question: What is (are) Antiphospholipid Antibody Syndrome ?
Model_Answer: Antiphospholipid (AN-te-fos-fo-LIP-id) antibody syndrome (APS) is an autoimmune disorder. Autoimmune
Reference Answer: Antiphospholipid (AN-te-fos-fo-LIP-id) antibody syndrome (APS) is an autoimmune disorder. Autoimmune


Output()

[58] 🎯 Correctness Score (raw): 0.7901940540589265

--- [Row 59] ---
Question: What causes Antiphospholipid Antibody Syndrome ?
Model_Answer: Antiphospholipid antibody syndrome (APS) occurs if the body's immune system makes antibodies (protei
Reference Answer: Antiphospholipid antibody syndrome (APS) occurs if the body's immune system makes antibodies (protei


Output()

[59] 🎯 Correctness Score (raw): 0.7399706895032085

--- [Row 60] ---
Question: What is (are) Antiphospholipid Syndrome ?
Model_Answer: Antiphospholipid syndrome (APS) is an autoimmune disorder caused when antibodies -- immune system ce
Reference Answer: Antiphospholipid syndrome (APS) is an autoimmune disorder caused when antibodies -- immune system ce


Output()

[60] 🎯 Correctness Score (raw): 0.7957979675611797

--- [Row 61] ---
Question: What are the treatments for Antiphospholipid Syndrome ?
Model_Answer: The main goal of treatment is to thin the blood to reduce clotting. At present, the recommended trea
Reference Answer: The main goal of treatment is to thin the blood to reduce clotting. At present, the recommended trea


Output()

[61] 🎯 Correctness Score (raw): 0.7734112665842027

--- [Row 62] ---
Question: what research (or clinical trials) is being done for Antiphospholipid Syndrome ?
Model_Answer: The National Institute of Neurological Disorders and Stroke (NINDS) and other institutes of the Nati
Reference Answer: The National Institute of Neurological Disorders and Stroke (NINDS) and other institutes of the Nati


Output()

[62] 🎯 Correctness Score (raw): 0.7344664199549906

--- [Row 63] ---
Question: What are the side effects or risks of Methadone ?
Model_Answer: Methadone may cause side effects. Tell your doctor if any of these symptoms are severe or do not go 
Reference Answer: Methadone may cause side effects. Tell your doctor if any of these symptoms are severe or do not go 


Output()

[63] 🎯 Correctness Score (raw): 0.8372144726847832

--- [Row 64] ---
Question: What are the brand names of Methylprednisolone Oral ?
Model_Answer: - Medrol)  - Solu-Medrol)  - Pred)  - Deltasone)  - Prednisone)  - Oral Pred)  - Methylpred)  - Solu
Reference Answer: - Medrol)


Output()

[64] 🎯 Correctness Score (raw): 0.39448436555396665

--- [Row 65] ---
Question: Who should get Methylprednisolone Oral and why is it prescribed ?
Model_Answer: Methylprednisolone, a corticosteroid, is similar to a natural hormone produced by your adrenal gland
Reference Answer: Methylprednisolone, a corticosteroid, is similar to a natural hormone produced by your adrenal gland


Output()

[65] 🎯 Correctness Score (raw): 0.7646855689454772

--- [Row 66] ---
Question: How should Methylprednisolone Oral be used and what is the dosage ?
Model_Answer: Methylprednisolone comes as a tablet to take by mouth. Your doctor will prescribe a dosing schedule 
Reference Answer: Methylprednisolone comes as a tablet to take by mouth. Your doctor will prescribe a dosing schedule 


Output()

[66] 🎯 Correctness Score (raw): 0.666341068190933

--- [Row 67] ---
Question: Are there safety concerns or special precautions about Methylprednisolone Oral ?
Model_Answer: Before taking methylprednisolone,  - tell your doctor and pharmacist if you are allergic to methylpr
Reference Answer: Before taking methylprednisolone,  - tell your doctor and pharmacist if you are allergic to methylpr


Output()

[67] 🎯 Correctness Score (raw): 0.5983558438382264

--- [Row 68] ---
Question: What special dietary instructions should I follow with Methylprednisolone Oral ?
Model_Answer: Your doctor may instruct you to follow a low-sodium, low-salt, potassium-rich, or high-protein diet.
Reference Answer: Your doctor may instruct you to follow a low-sodium, low-salt, potassium-rich, or high-protein diet.


Output()

[68] 🎯 Correctness Score (raw): 0.7666306494362735

--- [Row 69] ---
Question: What should I do if I forget a dose of Methylprednisolone Oral ?
Model_Answer: When you start to take methylprednisolone, ask your doctor what to do if you forget a dose. Write do
Reference Answer: When you start to take methylprednisolone, ask your doctor what to do if you forget a dose. Write do


Output()

[69] 🎯 Correctness Score (raw): 0.7146506745547642

--- [Row 70] ---
Question: What are the side effects or risks of Methylprednisolone Oral ?
Model_Answer: Methylprednisolone may cause side effects. Tell your doctor if any of these symptoms are severe or d
Reference Answer: Methylprednisolone may cause side effects. Tell your doctor if any of these symptoms are severe or d


Output()

[70] 🎯 Correctness Score (raw): 0.7936248665359089

--- [Row 71] ---
Question: What to do in case of emergency or overdose of Methylprednisolone Oral ?
Model_Answer: In case of overdose, call your local poison control center at 1-800-222-1222. If the victim has coll
Reference Answer: In case of overdose, call your local poison control center at 1-800-222-1222. If the victim has coll


Output()

[71] 🎯 Correctness Score (raw): 0.7279316484335918

--- [Row 72] ---
Question: What other information should I know about Methylprednisolone Oral ?
Model_Answer: Your doctor has ordered methylprednisolone, a corticosteroid, to relieve inflammation (swelling, hea
Reference Answer: Keep all appointments with your doctor and the laboratory. Your doctor will order certain lab tests 


Output()

[72] 🎯 Correctness Score (raw): 0.7342340168000077

--- [Row 73] ---
Question: What other information should I know about Methylprednisolone Sodium Succinate Injection ?
Model_Answer: Your doctor has ordered methylprednisolone, a corticosteroid, to relieve inflammation (swelling, hea
Reference Answer: Your doctor has ordered methylprednisolone, a corticosteroid, to relieve inflammation (swelling, hea


Output()

[73] 🎯 Correctness Score (raw): 0.7834265174798893

--- [Row 74] ---
Question: What are the brand names of Methylprednisolone Sodium Succinate Injection ?
Model_Answer: - A-methaPred  - Depo-Medrol  - Solu-Medrol)  - SoluPred)  - Pred  - Prednisone)  - Deltasone)  - Me
Reference Answer: - A-methaPred  - Depo-Medrol  - Solu-Medrol)


Output()

[74] 🎯 Correctness Score (raw): 0.34248823860682814

--- [Row 75] ---
Question: What are the side effects or risks of Prednisone ?
Model_Answer: Prednisone may cause side effects. Tell your doctor if any of these symptoms are severe or do not go
Reference Answer: Prednisone may cause side effects. Tell your doctor if any of these symptoms are severe or do not go


Output()

[75] 🎯 Correctness Score (raw): 0.8492965167239357

--- [Row 76] ---
Question: What are the symptoms of Electrical injury ? (Also called: Electrical shock)
Model_Answer: Symptoms depend on many things, including:  - Type and strength of voltage  - How long you were in c
Reference Answer: Symptoms depend on many things, including:  - Type and strength of voltage  - How long you were in c


Output()

[76] 🎯 Correctness Score (raw): 0.7921525650636394

--- [Row 77] ---
Question: What should I know about storage and disposal of Calcitonin Salmon Nasal Spray ?
Model_Answer: Keep this medication in the container it came in, tightly closed, and out of reach of children. Stor
Reference Answer: Keep this medication in the container it came in, tightly closed, and out of reach of children. Stor


Output()

[77] 🎯 Correctness Score (raw): 0.8778280113909765

--- [Row 78] ---
Question: What is (are) Isolated sleep paralysis ? (Also called: Sleep paralysis - isolated; Parasomnia - isol
Model_Answer: Isolated sleep paralysis is a type of paralysis that occurs when a person just goes to sleep or upon
Reference Answer: Isolated sleep paralysis is a type of paralysis that occurs when a person just goes to sleep or upon


Output()

[78] 🎯 Correctness Score (raw): 0.6371203211338086

--- [Row 79] ---
Question: What are the symptoms of Isolated sleep paralysis ? (Also called: Sleep paralysis - isolated; Paraso
Model_Answer: Episodes of isolated sleep paralysis last from a few seconds to 1 or 2 minutes. During these episode
Reference Answer: Episodes of isolated sleep paralysis last from a few seconds to 1 or 2 minutes. During these episode


Output()

[79] 🎯 Correctness Score (raw): 0.6556922939073799

--- [Row 80] ---
Question: How to diagnose Isolated sleep paralysis ? (Also called: Sleep paralysis - isolated; Parasomnia - is
Model_Answer: Sleep paralysis can be a symptom of narcolepsy. But if you do not have other symptoms of narcolepsy,
Reference Answer: Sleep paralysis can be a symptom of narcolepsy. But if you do not have other symptoms of narcolepsy,


Output()

[80] 🎯 Correctness Score (raw): 0.7313145065734498

--- [Row 81] ---
Question: What are the treatments for Isolated sleep paralysis ? (Also called: Sleep paralysis - isolated; Par
Model_Answer: In most cases, isolated sleep paralysis occurs so rarely that treatment is not needed. If the cause 
Reference Answer: In most cases, isolated sleep paralysis occurs so rarely that treatment is not needed. If the cause 


Output()

[81] 🎯 Correctness Score (raw): 0.7415472937806484

--- [Row 82] ---
Question: What is (are) progressive familial heart block ? (Also called: bundle branch block; HBBD; hereditary
Model_Answer: Progressive familial heart block is a genetic condition that alters the normal beating of the heart.
Reference Answer: Progressive familial heart block is a genetic condition that alters the normal beating of the heart.


Output()

[82] 🎯 Correctness Score (raw): 0.6084868386738032

--- [Row 83] ---
Question: Are there safety concerns or special precautions about Cephalexin ?
Model_Answer: Before taking cephalexin,  - tell your doctor and pharmacist if you are allergic to cephalexin or an
Reference Answer: Before taking cephalexin,  - tell your doctor and pharmacist if you are allergic to cephalexin or an


Output()

[83] 🎯 Correctness Score (raw): 0.7629466310385692

--- [Row 84] ---
Question: Do you have information about Cardiac ablation procedures (Also called: Catheter ablation; Radiofreq
Model_Answer: Summary : Cardiac ablation is a procedure that is used to scar small areas in your heart that may be
Reference Answer: Summary : Cardiac ablation is a procedure that is used to scar small areas in your heart that may be


Output()

[84] 🎯 Correctness Score (raw): 0.8186927112713397

--- [Row 85] ---
Question: What causes Hepatitis A ? (Also called: Viral hepatitis; Infectious hepatitis)
Model_Answer: The hepatitis A virus is found mostly in the stools and blood of an infected person. The virus is pr
Reference Answer: The hepatitis A virus is found mostly in the stools and blood of an infected person. The virus is pr


Output()

[85] 🎯 Correctness Score (raw): 0.7779469389099989

--- [Row 86] ---
Question: How to prevent Hepatitis A ? (Also called: Viral hepatitis; Infectious hepatitis)
Model_Answer: The following tips can help reduce your risk of spreading or catching the virus:  - Always wash your
Reference Answer: The following tips can help reduce your risk of spreading or catching the virus:  - Always wash your


Output()

[86] 🎯 Correctness Score (raw): 0.8066010406365061

--- [Row 87] ---
Question: What causes Hepatitis C ? (Also called: Sustained virologic response - hepatitis C; SVR - hepatitis 
Model_Answer: Hepatitis C infection is caused by the hepatitis C virus (HCV).    You can catch hepatitis C if the 
Reference Answer: Hepatitis C infection is caused by the hepatitis C virus (HCV).    You can catch hepatitis C if the 


Output()

[87] 🎯 Correctness Score (raw): 0.8670297978426671

--- [Row 88] ---
Question: What causes Ribcage pain ? (Also called: Pain - ribcage)
Model_Answer: Ribcage pain may be caused by any of the following:  - Bruised, cracked, or fractured rib  - Inflamm
Reference Answer: Ribcage pain may be caused by any of the following:  - Bruised, cracked, or fractured rib  - Inflamm


Output()

[88] 🎯 Correctness Score (raw): 0.7343253255979856

--- [Row 89] ---
Question: What is (are) Hantavirus ? (Also called: Hantavirus pulmonary syndrome; Hemorrhagic fever with renal
Model_Answer: Hantavirus is a life-threatening viral infection spread to humans by rodents. It has symptoms simila
Reference Answer: Hantavirus is a life-threatening viral infection spread to humans by rodents. It has symptoms simila


Output()

[89] 🎯 Correctness Score (raw): 0.5690875263414433

--- [Row 90] ---
Question: What is the outlook for Hantavirus ? (Also called: Hantavirus pulmonary syndrome; Hemorrhagic fever 
Model_Answer: Hantavirus is a serious infection that gets worse quickly. Lung failure can occur and may lead to de
Reference Answer: Hantavirus is a serious infection that gets worse quickly. Lung failure can occur and may lead to de


Output()

[90] 🎯 Correctness Score (raw): 0.6399008180044523

--- [Row 91] ---
Question: What are the complications of Hantavirus ? (Also called: Hantavirus pulmonary syndrome; Hemorrhagic 
Model_Answer: Complications of hantavirus may include:  - Kidney failure  - Heart and lung failure   These complic
Reference Answer: Complications of hantavirus may include:  - Kidney failure  - Heart and lung failure   These complic


Output()

[91] 🎯 Correctness Score (raw): 0.7097498492427341

--- [Row 92] ---
Question: what are the symptoms for Hantavirus ?
Model_Answer: Due to the small number of HPS cases, the "incubation time" is not positively known. However, on the
Reference Answer: Due to the small number of HPS cases, the "incubation time" is not positively known. However, on the


Output()

[92] 🎯 Correctness Score (raw): 0.7869276258372759

--- [Row 93] ---
Question: what is the history of hps for Hantavirus ?
Model_Answer: The "First"OutbreaksIn 1969, a large outbreak of hantavirus occurred in Seoul, South Korea. The dise
Reference Answer: The "First"Outbreak


Output()

[93] 🎯 Correctness Score (raw): 0.6302257252645417

--- [Row 94] ---
Question: What are the symptoms of Appendicitis ?
Model_Answer: Pain around the belly button, pain that moves into the right lower part of your belly, nausea, vomit
Reference Answer: The symptoms of appendicitis can vary. It can be hard to diagnose appendicitis in young children, th


Output()

[94] 🎯 Correctness Score (raw): 0.6387982376729338

--- [Row 95] ---
Question: What is (are) Appendicitis ?
Model_Answer: The appendix is a small, tube-like organ attached to the first part of the large intestine. It is lo
Reference Answer: The appendix is a small, tube-like organ attached to the first part of the large intestine. It is lo


Output()

[95] 🎯 Correctness Score (raw): 0.8013982944653011

--- [Row 96] ---
Question: How should Oxybutynin be used and what is the dosage ?
Model_Answer: Oxybutynin comes as a tablet, a syrup, and an extended-release (long-acting) tablet to take by mouth
Reference Answer: Oxybutynin comes as a tablet, a syrup, and an extended-release (long-acting) tablet to take by mouth


Output()

[96] 🎯 Correctness Score (raw): 0.7813072870536277

--- [Row 97] ---
Question: What is the outlook for Trisomy 13 ? (Also called: Patau syndrome)
Model_Answer: More than 90% of children with trisomy 13 die in the first year.)  If you have a child with trisomy 
Reference Answer: More than 90% of children with trisomy 13 die in the first year.)


Output()

[97] 🎯 Correctness Score (raw): 0.7823957448903217

--- [Row 98] ---
Question: What are the complications of Trisomy 13 ? (Also called: Patau syndrome)
Model_Answer: Complications begin almost immediately. Most infants with trisomy 13 have congenital heart disease. 
Reference Answer: Complications begin almost immediately. Most infants with trisomy 13 have congenital heart disease. 


Output()

[98] 🎯 Correctness Score (raw): 0.7739529675193622

--- [Row 99] ---
Question: What is (are) trisomy 13 ? (Also called: Bartholin-Patau syndrome; complete trisomy 13 syndrome; Pat
Model_Answer: Trisomy 13, also called Patau syndrome, is a chromosomal condition associated with severe intellectu
Reference Answer: Trisomy 13, also called Patau syndrome, is a chromosomal condition associated with severe intellectu


Output()

[99] 🎯 Correctness Score (raw): 0.77417470319549

--- [Row 100] ---
Question: What are the treatments for trisomy 13 ? (Also called: Bartholin-Patau syndrome; complete trisomy 13
Model_Answer: These resources address the diagnosis or management of trisomy 13:  - Genetic Testing Registry: Comp
Reference Answer: These resources address the diagnosis or management of trisomy 13:  - Genetic Testing Registry: Comp


Output()

[100] 🎯 Correctness Score (raw): 0.36622830543713003

--- [Row 101] ---
Question: What is (are) Alcohol use and safe drinking ? (Also called: Beer consumption; Wine consumption; Hard
Model_Answer: Alcohol is one of the most widely used drug substances in the world.  TEEN DRINKING  Alcohol use is 
Reference Answer: Alcohol is one of the most widely used drug substances in the world.  TEEN DRINKING  Alcohol use is 


Output()

[101] 🎯 Correctness Score (raw): 0.54820836456856

--- [Row 102] ---
Question: What causes Low blood sugar - newborns ? (Also called: Neonatal hypoglycemia)
Model_Answer: Glucose level can drop if:  - The baby is not producing enough insulin  - The baby's body is using m
Reference Answer: Babies need blood sugar (glucose) for energy. Most of that glucose is used by the brain.  The baby g


Output()

[102] 🎯 Correctness Score (raw): 0.47397250293273857

--- [Row 103] ---
Question: What are the complications of Low blood sugar - newborns ? (Also called: Neonatal hypoglycemia)
Model_Answer: Severe or persistent low blood sugar level may affect the baby's mental function. In rare cases, hea
Reference Answer: Severe or persistent low blood sugar level may affect the baby's mental function. In rare cases, hea


Output()

[103] 🎯 Correctness Score (raw): 0.6899308671002534

--- [Row 104] ---
Question: How to prevent Low blood sugar - newborns ? (Also called: Neonatal hypoglycemia)
Model_Answer: If you have diabetes during pregnancy, work with your health care provider to control your blood sug
Reference Answer: If you have diabetes during pregnancy, work with your health care provider to control your blood sug


Output()

[104] 🎯 Correctness Score (raw): 0.43639604680364863

--- [Row 105] ---
Question: How to diagnose Diverticulosis ? (Also called: Diverticula - diverticulosis; Diverticulitis - divert
Model_Answer: Diverticulosis is often found during an exam for another health problem. For example, it is often di
Reference Answer: Diverticulosis is often found during an exam for another health problem. For example, it is often di


Output()

[105] 🎯 Correctness Score (raw): 0.6983119592164735

--- [Row 106] ---
Question: What is (are) Diverticulosis and Diverticulitis ? (Also called: Diverticular disease)
Model_Answer: Diverticula are small pouches that bulge outward through the colon, or large intestine. If you have 
Reference Answer: Diverticula are small pouches that bulge outward through the colon, or large intestine. If you have 


Output()

[106] 🎯 Correctness Score (raw): 0.8066010401550396

--- [Row 107] ---
Question: What causes Common cold ? (Also called: Upper respiratory infection - viral; Cold)
Model_Answer: It is called the common cold for good reason. There are over one billion colds in the United States 
Reference Answer: It is called the common cold for good reason. There are over one billion colds in the United States 


Output()

[107] 🎯 Correctness Score (raw): 0.5180225317054808

--- [Row 108] ---
Question: What is (are) hereditary hypophosphatemic rickets ? (Also called: hypophosphatemia; VDRR; vitamin D-
Model_Answer: Hereditary hypophosphatemic rickets is a disorder related to low levels of phosphate in the blood (h
Reference Answer: Hereditary hypophosphatemic rickets is a disorder related to low levels of phosphate in the blood (h


Output()

[108] 🎯 Correctness Score (raw): 0.7501047931303843

--- [Row 109] ---
Question: How many people are affected by hereditary hypophosphatemic rickets ? (Also called: hypophosphatemia
Model_Answer: Hereditary hypophosphatemic rickets affects an estimated 1 in 20,000 newborns worldwide. The conditi
Reference Answer: X-linked hypophosphatemic rickets is the most common form of rickets that runs in families. It affec


Output()

[109] 🎯 Correctness Score (raw): 0.7805312163527927

--- [Row 110] ---
Question: What are the genetic changes related to hereditary hypophosphatemic rickets ? (Also called: hypophos
Model_Answer: Hereditary hypophosphatemic rickets can result from mutations in several genes. Mutations in the PHE
Reference Answer: Hereditary hypophosphatemic rickets can result from mutations in several genes. Mutations in the PHE


Output()

[110] 🎯 Correctness Score (raw): 0.7802113293291624

--- [Row 111] ---
Question: Is hereditary hypophosphatemic rickets inherited ? (Also called: hypophosphatemia; VDRR; vitamin D-r
Model_Answer: Hereditary hypophosphatemic rickets can have several patterns of inheritance. When the condition res
Reference Answer: Hereditary hypophosphatemic rickets can have several patterns of inheritance. When the condition res


Output()

[111] 🎯 Correctness Score (raw): 0.8057687926513871

--- [Row 112] ---
Question: How to prevent Shingles ? (Also called: Herpes zoster)
Model_Answer: Do not touch the rash and blisters on persons with shingles or chickenpox if you have never had chic
Reference Answer: Do not touch the rash and blisters on persons with shingles or chickenpox if you have never had chic


Output()

[112] 🎯 Correctness Score (raw): 0.6016701907604529

--- [Row 113] ---
Question: How to prevent Shingles ?
Model_Answer: A Vaccine for Adults 60 and Older In May 2006, the U.S. Food and Drug Administration approved a vacc
Reference Answer: Yes. In May 2006, the U.S. Food and Drug Administration approved a vaccine to prevent shingles in pe


Output()

[113] 🎯 Correctness Score (raw): 0.7615351683708523

--- [Row 114] ---
Question: What is (are) Shingles ?
Model_Answer: Shingles is a painful rash that develops on one side of the face or body. The rash forms blisters th
Reference Answer: Shingles is a painful rash that develops on one side of the face or body. The rash forms blisters th


Output()

[114] 🎯 Correctness Score (raw): 0.7929730708614788

--- [Row 115] ---
Question: How to prevent Shingles ?
Model_Answer: A Vaccine for Adults 60 and Older In May 2006, the U.S. Food and Drug Administration approved a vacc
Reference Answer: A Vaccine for Adults 60 and Older In May 2006, the U.S. Food and Drug Administration approved a vacc


Output()

[115] 🎯 Correctness Score (raw): 0.7894527569120561

--- [Row 116] ---
Question: What to do for Leg pain ? (Also called: Pain - leg; Aches - leg; Cramps - leg)
Model_Answer: If you have leg pain from cramps or overuse, take these steps first:  - Rest as much as possible.  -
Reference Answer: If you have leg pain from cramps or overuse, take these steps first:  - Rest as much as possible.  -


Output()

[116] 🎯 Correctness Score (raw): 0.7951587007225774

--- [Row 117] ---
Question: What is (are) Diabetes ? (Also called: Diabetes - type 1; Diabetes - type 2; Diabetes - gestational;
Model_Answer: Diabetes is a chronic disease in which the body cannot regulate the amount of sugar in the blood.)  
Reference Answer: Diabetes is a chronic disease in which the body cannot regulate the amount of sugar in the blood.)


Output()

[117] 🎯 Correctness Score (raw): 0.6713259125545354

--- [Row 118] ---
Question: What causes Diabetes ? (Also called: Diabetes - type 1; Diabetes - type 2; Diabetes - gestational; T
Model_Answer: Insulin is a hormone produced by the pancreas to control blood sugar. Diabetes can be caused by too 
Reference Answer: Insulin is a hormone produced by the pancreas to control blood sugar. Diabetes can be caused by too 


Output()

[118] 🎯 Correctness Score (raw): 0.6954439302022452

--- [Row 119] ---
Question: What is (are) Diabetes ? (Also called: Diabetes mellitus; DM)
Model_Answer: Diabetes is a chronic disease in which the body cannot regulate the amount of sugar in the blood.)  
Reference Answer: Diabetes is a disease in which your blood glucose, or blood sugar, levels are too high. Glucose come


Output()

[119] 🎯 Correctness Score (raw): 0.4502595229761443

--- [Row 120] ---
Question: What is (are) Diabetes Type 1 ? (Also called: Insulin-dependent diabetes; Juvenile diabetes; Type I 
Model_Answer: Diabetes means your blood glucose, or blood sugar, levels are too high. With type 1 diabetes, your p
Reference Answer: Diabetes means your blood glucose, or blood sugar, levels are too high. With type 1 diabetes, your p


Output()

[120] 🎯 Correctness Score (raw): 0.747577203013324

--- [Row 121] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Heart disease and stroke are the leading causes of death for people with diabetes. Controlling the A


Output()

[121] 🎯 Correctness Score (raw): 0.7466039005984545

--- [Row 122] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: People with diabetes should - do aerobic activities,  such as brisk walking, which use the bodys lar


Output()

[122] 🎯 Correctness Score (raw): 0.7602033980321827

--- [Row 123] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Too Much Glucose in the Blood Diabetes means your blood glucose (often called blood sugar) is too hi


Output()

[123] 🎯 Correctness Score (raw): 0.79706375454162

--- [Row 124] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some


Output()

[124] 🎯 Correctness Score (raw): 0.813612212583015

--- [Row 125] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Type 1 diabetes, which used to be called called juvenile diabetes or insulin-dependent diabetes, dev


Output()

[125] 🎯 Correctness Score (raw): 0.7381164163438557

--- [Row 126] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Prediabetes means your blood glucose levels are higher than normal but not high enough for a diagnos


Output()

[126] 🎯 Correctness Score (raw): 0.7023892207552818

--- [Row 127] ---
Question: What is (are) Diabetes ?
Model_Answer: Diabetes means your blood glucose (often called blood sugar) is too high. Your blood always has some
Reference Answer: Studies have shown that most people with prediabetes develop type 2 diabetes within a few years, unl


Output()

[127] 🎯 Correctness Score (raw): 0.7919454931870832

--- [Row 128] ---
Question: Do you have information about Calcium in diet (Also called: Diet - calcium)
Model_Answer: Summary : Calcium is the most plentiful mineral found in the human body. The teeth and bones contain
Reference Answer: Summary : Calcium is the most plentiful mineral found in the human body. The teeth and bones contain


Output()

[128] 🎯 Correctness Score (raw): 0.7898667187900764

--- [Row 129] ---
Question: Do you have information about Calcium supplements
Model_Answer: Summary : WHO SHOULD TAKE CALCIUM SUPPLEMENTS?  Calcium is an important mineral for the human body. 
Reference Answer: Summary : WHO SHOULD TAKE CALCIUM SUPPLEMENTS?  Calcium is an important mineral for the human body. 


Output()

[129] 🎯 Correctness Score (raw): 0.8191021647511822

--- [Row 130] ---
Question: What are the complications of Cervicitis ? (Also called: Cervical inflammation; Inflammation - cervi
Model_Answer: Cervicitis may last for months to years. Cervicitis may lead to pain with intercourse (dyspareunia).
Reference Answer: Cervicitis may last for months to years. Cervicitis may lead to pain with intercourse (dyspareunia).


Output()

[130] 🎯 Correctness Score (raw): 0.7679497711553012

--- [Row 131] ---
Question: Do you have information about Nicotine and tobacco (Also called: Withdrawal from nicotine; Smoking -
Model_Answer: Summary : The nicotine in tobacco can be addictive like alcohol, cocaine, and morphine.  Withdrawal 
Reference Answer: Summary : The nicotine in tobacco can be addictive like alcohol, cocaine, and morphine.


Output()

[131] 🎯 Correctness Score (raw): 0.7165014359455159

--- [Row 132] ---
Question: What are the side effects or risks of Nicotine Gum ?
Model_Answer: If you experience any of the following symptoms, stop using nicotine gum and call your doctor immedi
Reference Answer: If you experience any of the following symptoms, stop using nicotine gum and call your doctor immedi


Output()

[132] 🎯 Correctness Score (raw): 0.7539150736173139

--- [Row 133] ---
Question: What is (are) Foot, leg, and ankle swelling ? (Also called: Swelling of the ankles - feet - legs; An
Model_Answer: Painless swelling of the feet and ankles is a common problem, especially among older people.  Abnorm
Reference Answer: Painless swelling of the feet and ankles is a common problem, especially among older people.  Abnorm


Output()

[133] 🎯 Correctness Score (raw): 0.7745988153477591

--- [Row 134] ---
Question: What to do for Foot, leg, and ankle swelling ? (Also called: Swelling of the ankles - feet - legs; A
Model_Answer: Some tips that may help reduce swelling:  - Put your legs on pillows to raise them above your heart 
Reference Answer: Painless swelling may affect both legs and may include the calves or even the thighs. The effect of 


Output()

[134] 🎯 Correctness Score (raw): 0.7542276312300756

--- [Row 135] ---
Question: What causes Foot, leg, and ankle swelling ? (Also called: Swelling of the ankles - feet - legs; Ankl
Model_Answer: Foot, leg, and ankle swelling is common when the person also:  - Is overweight  - Has a blood clot i
Reference Answer: Foot, leg, and ankle swelling is common when the person also:  - Is overweight  - Has a blood clot i


Output()

[135] 🎯 Correctness Score (raw): 0.8233963240524256

--- [Row 136] ---
Question: What to do for Foot, leg, and ankle swelling ? (Also called: Swelling of the ankles - feet - legs; A
Model_Answer: Some tips that may help reduce swelling:  - Put your legs on pillows to raise them above your heart 
Reference Answer: Some tips that may help reduce swelling:  - Put your legs on pillows to raise them above your heart 


Output()

[136] 🎯 Correctness Score (raw): 0.8080129895323622

--- [Row 137] ---
Question: What is the outlook for Foot, leg, and ankle swelling ? (Also called: Swelling of the ankles - feet 
Model_Answer: Your provider will take a medical history and do a thorough physical examination, paying special att
Reference Answer: Your provider will take a medical history and do a thorough physical examination, paying special att


Output()

[137] 🎯 Correctness Score (raw): 0.7227131741401334

--- [Row 138] ---
Question: What causes Plantar fasciitis ?
Model_Answer: Plantar fasciitis occurs when the thick band of tissue on the bottom of the foot is overstretched or
Reference Answer: Plantar fasciitis occurs when the thick band of tissue on the bottom of the foot is overstretched or


Output()

[138] 🎯 Correctness Score (raw): 0.8455840136059376

--- [Row 139] ---
Question: How to prevent Plantar fasciitis ?
Model_Answer: Stretch your plantar fascia in the morning before you get out of bed. Doing activities in moderation
Reference Answer: Making sure your ankle, Achilles tendon, and calf muscles are flexible can help prevent plantar fasc


Output()

[139] 🎯 Correctness Score (raw): 0.7735727665349318

--- [Row 140] ---
Question: How to diagnose Liver cancer - hepatocellular carcinoma ? (Also called: Primary liver cell carcinoma
Model_Answer: Thedoctor will perform a physical exam and ask about your symptoms. The physical exam may show an en
Reference Answer: Thedoctor will perform a physical exam and ask about your symptoms. The physical exam may show an en


Output()

[140] 🎯 Correctness Score (raw): 0.7258884965465083

--- [Row 141] ---
Question: How to diagnose Liver metastases ? (Also called: Metastases to the liver; Metastatic liver cancer; L
Model_Answer: Tests that may be done to diagnose liver metastases include:  - CT scan of the abdomen  - Liver func
Reference Answer: Tests that may be done to diagnose liver metastases include:  - CT scan of the abdomen  - Liver func


Output()

[141] 🎯 Correctness Score (raw): 0.4879087074600175

--- [Row 142] ---
Question: Do you have information about Upper GI and small bowel series (Also called: GI series; Barium swallo
Model_Answer: Summary : An upper GI and small bowel series is a set of x-rays taken to examine the esophagus, stom
Reference Answer: Summary : An upper GI and small bowel series is a set of x-rays taken to examine the esophagus, stom


Output()

[142] 🎯 Correctness Score (raw): 0.7244352875274072

--- [Row 143] ---
Question: What is (are) Heart Attack ?
Model_Answer: Espaol: El corazón ataque significa que el corazón ha parado o no ha comenzado a latir. Esto ocurre 
Reference Answer: Espaol


Output()

[143] 🎯 Correctness Score (raw): 0.58956148788891

--- [Row 144] ---
Question: What causes Heart Attack ?
Model_Answer: Most heart attacks are caused by a blood clot that blocks one of the coronary arteries, the blood ve
Reference Answer: Coronary Heart Disease


Output()

[144] 🎯 Correctness Score (raw): 0.8116312063168776

--- [Row 145] ---
Question: What causes Heart Attack ?
Model_Answer: Most heart attacks are caused by a blood clot that blocks one of the coronary arteries, the blood ve
Reference Answer: Most heart attacks are caused by a blood clot that blocks one of the coronary arteries, the blood ve


Output()

[145] 🎯 Correctness Score (raw): 0.8150026815395343

--- [Row 146] ---
Question: What causes Heart Attack ?
Model_Answer: Most heart attacks are caused by a blood clot that blocks one of the coronary arteries, the blood ve
Reference Answer: Coronary heart disease, or CHD, is the most common underlying cause of a heart attack. Coronary arte


Output()

[146] 🎯 Correctness Score (raw): 0.8187763430597546

--- [Row 147] ---
Question: How to diagnose Stroke ? (Also called: Cerebrovascular disease; CVA; Cerebral infarction; Cerebral h
Model_Answer: The doctor will do a physical exam to:   - Check for problems with vision, movement, feeling, reflex
Reference Answer: The doctor will do a physical exam to:   - Check for problems with vision, movement, feeling, reflex


Output()

[147] 🎯 Correctness Score (raw): 0.7782141599535661

--- [Row 148] ---
Question: How to diagnose Autoimmune disorders ?
Model_Answer: The health care provider will do a physical exam. Signs depend on the type of disease.  Tests that m
Reference Answer: The health care provider will do a physical exam. Signs depend on the type of disease.  Tests that m


Output()

[148] 🎯 Correctness Score (raw): 0.7344664206323175

--- [Row 149] ---
Question: What should I know about storage and disposal of Glimepiride ?
Model_Answer: Keep this medication in the container it came in, tightly closed, and out of reach of children. Stor
Reference Answer: Keep this medication in the container it came in, tightly closed, and out of reach of children. Stor


[149] 🎯 Correctness Score (raw): 0.8077238220745988
✅ Final results saved to final_eval_results.xlsx


## For BLEU Score

In [6]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [7]:
import pandas as pd

# Load the saved Excel file
df = pd.read_excel("generated_answers.xlsx")

print("✅ DataFrame reloaded successfully!")
print(df.head())  # Display the first few rows to verify

✅ DataFrame reloaded successfully!
                AnswerID    Unnamed: 2              Unnamed: 3  \
0  ADAM_0002818_Sec1.txt  3-Incomplete   ADAM_0002818_Sec1.txt   
1  ADAM_0002818_Sec2.txt  3-Incomplete   ADAM_0002818_Sec2.txt   
2  GARD_0004450_Sec1.txt  3-Incomplete   GARD_0004450_Sec1.txt   
3  GARD_0004450_Sec2.txt  3-Incomplete   GARD_0004450_Sec2.txt   
4  GARD_0004450_Sec3.txt  3-Incomplete   GARD_0004450_Sec3.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What causes Noonan syndrome ?   
2  What is (are) Noonan syndrome ? (Also called: ...   
3  What are the symptoms of Noonan syndrome ? (Al...   
4  Is Noonan syndrome inherited ? (Also called: M...   

                                              Answer  \
0  Noonan syndrome is a disease that can be passe...   
1  Noonan syndrome is linked to defects in severa...   
2  Noonan syndrome is a genetic disorder that cau...   
3  What

In [8]:
# Filter valid rows again
valid_rows = df[
    (df["Model_Answer"].notna()) &
    (df["Answer"].notna()) &
    (df["Question"].notna()) &
    (df["Model_Answer"] != "Skipped")
].copy()

print(valid_rows.head())

                AnswerID    Unnamed: 2              Unnamed: 3  \
0  ADAM_0002818_Sec1.txt  3-Incomplete   ADAM_0002818_Sec1.txt   
1  ADAM_0002818_Sec2.txt  3-Incomplete   ADAM_0002818_Sec2.txt   
2  GARD_0004450_Sec1.txt  3-Incomplete   GARD_0004450_Sec1.txt   
3  GARD_0004450_Sec2.txt  3-Incomplete   GARD_0004450_Sec2.txt   
4  GARD_0004450_Sec3.txt  3-Incomplete   GARD_0004450_Sec3.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What causes Noonan syndrome ?   
2  What is (are) Noonan syndrome ? (Also called: ...   
3  What are the symptoms of Noonan syndrome ? (Al...   
4  Is Noonan syndrome inherited ? (Also called: M...   

                                              Answer  \
0  Noonan syndrome is a disease that can be passe...   
1  Noonan syndrome is linked to defects in severa...   
2  Noonan syndrome is a genetic disorder that cau...   
3  What are the signs and symptoms of Noon

In [9]:
# ========== 8. Compute BLEU Score (No punkt needed) ==========
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

smoothie = SmoothingFunction().method4  # Smoothing for short sentences

# BLEU score calculation helper using simple whitespace split
def compute_bleu(reference, hypothesis):
    if not reference or not hypothesis:
        return 0.0
    try:
        reference_tokens = reference.split()
        hypothesis_tokens = hypothesis.split()

        return sentence_bleu(
            [reference_tokens], hypothesis_tokens,
            weights=(0.25, 0.25, 0.25, 0.25),  # BLEU-4
            smoothing_function=smoothie
        )
    except Exception as e:
        print(f"⚠️ BLEU computation error: {e}")
        return 0.0

# Store BLEU scores
bleu_scores = []

# Loop through rows and print BLEU computation info
for i, row in valid_rows.iterrows():
    reference = row["Answer"]
    hypothesis = row["Model_Answer"]
    score = compute_bleu(reference, hypothesis)
    bleu_scores.append(score)

# Assign scores to DataFrame
valid_rows["BLEU_Score"] = bleu_scores

# ========== 9. Save BLEU Results ==========
# Create a subset of columns to save
results_to_save = valid_rows[[
    "Question",
    "Answer",           # Reference Answer
    "Model_Answer",
    "BLEU_Score"
]].copy()

# Rename for clarity
results_to_save.rename(columns={
    "Answer": "Reference Answer"
}, inplace=True)

# Save to CSV
results_to_save.to_csv("bleu_score_results.csv", index=False)
print("📄 BLEU results saved to bleu_score_results.csv")

📄 BLEU results saved to bleu_score_results.csv


# THE END OF RE-RUN SECTION

## Tune the model (LoRA)

In [9]:
# ===============================
# LoRA Fine-Tuning Script for LLaMA 3.2 3B/7B on Medquad
# ===============================

import os
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

# ===============================
# Step 1: Define Paths & Model ID
# ===============================
model_id = "meta-llama/Llama-3.2-3B-Instruct"  # You can also use the 3B model here
cache_dir = "/workspace/model_cache"
data_path = "VerifiedQ_A.xlsx"
output_dir = "./llama3_lora_medquad"

os.makedirs(cache_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

In [10]:
# ===============================
# Step 2: Load Dataset
# ===============================
df = pd.read_excel(data_path)
df = df.dropna(subset=["Question", "Answer"])

def format_prompt(example):
    return {
        "text": f"### Question:\n{example['Question']}\n\n### Answer:\n{example['Answer']}"
    }

raw_dataset = Dataset.from_pandas(df)
formatted_dataset = raw_dataset.map(format_prompt)

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

In [11]:
# ===============================
# Step 3: Tokenizer
# ===============================
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    return tokenizer(
        example["text"], truncation=True, padding="max_length", max_length=512
    )

tokenized_dataset = formatted_dataset.map(tokenize, batched=True)


Map:   0%|          | 0/150 [00:00<?, ? examples/s]

In [12]:
# ===============================
# Step 4: Load Model (Full Precision) + Inject LoRA
# ===============================
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    cache_dir=cache_dir
)

lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
# ===============================
# Step 5: Training Configuration (Updated)
# ===============================
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,  # Run optimizer step after every batch
    num_train_epochs=10,  # More epochs for small dataset
    learning_rate=2e-4,
    bf16=True,
    logging_dir=f"{output_dir}/logs",
    save_strategy="epoch",
    report_to="none",
    logging_steps=1,  # Optional: more frequent logging
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


In [14]:
# ===============================
# Step 6: Trainer
# ===============================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
1,2.0744
2,2.2467
3,1.7695
4,2.1574
5,1.8403
6,1.949
7,2.0056
8,1.4984
9,1.7692
10,1.9554


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

TrainOutput(global_step=380, training_loss=1.1147158563921327, metrics={'train_runtime': 124.7368, 'train_samples_per_second': 12.025, 'train_steps_per_second': 3.046, 'total_flos': 1.3073345150976e+16, 'train_loss': 1.1147158563921327, 'epoch': 10.0})

In [15]:
# ===============================
# Step 7: Save Model & Tokenizer
# ===============================
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Fine-tuning complete! Model saved to {output_dir}")

Fine-tuning complete! Model saved to ./llama3_lora_medquad


## Load the NEW LoRA model

In [16]:
# ===============================
# Step 8: Load Fine-Tuned LoRA Model for Evaluation
# ===============================
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import os

# Define the model directory where LoRA was saved
lora_model_dir = "./llama3_lora_medquad"
base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
cache_dir = "/workspace/model_cache"

# Load base LLaMA model (required for LoRA base)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir=cache_dir
)

# Load the LoRA adapters
model = PeftModel.from_pretrained(base_model, lora_model_dir)
model.eval()  # Set to eval mode

# Load tokenizer from fine-tuned folder (same tokenizer)
tokenizer = AutoTokenizer.from_pretrained(lora_model_dir)

print("Fine-tuned LoRA model and tokenizer successfully loaded!")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Fine-tuned LoRA model and tokenizer successfully loaded!


In [25]:
# ===============================
# 🔍 Optional: Confirm it works with a test question
# ===============================
prompt = "Answer concisely:\n\nWhat are the symptoms of diabetes?\n\nAnswer:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=256,
        pad_token_id=tokenizer.eos_token_id
    )

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("🧪 Sample Output:\n", response)

🧪 Sample Output:
 Answer concisely:

What are the symptoms of diabetes?

Answer: 
Diabetes symptoms can vary depending on the type and severity of the condition. Common symptoms include:

* Increased thirst and hunger
* Frequent urination
* Fatigue
* Blurred vision
* Slow healing of cuts and wounds
* Tingling or numbness in hands and feet
* Unexplained weight loss
* Recurring skin, gum, or bladder infections

In type 1 diabetes, symptoms appear suddenly and can include:
* Severe vomiting
* Abdominal pain
* Diarrhea
* Nausea
* Headache

In type 2 diabetes, symptoms often develop gradually and may include:
* Increased appetite
* Weight gain
* Increased urination
* Fatigue
* Blurred vision

Note: Some people with diabetes may not exhibit any symptoms at all, especially in the early stages of the condition. Regular blood glucose monitoring and check-ups with a healthcare provider can help detect diabetes before symptoms appear.


## Fine-Tuning

In [8]:
# ===============================
# Clean Fine-Tuning Script for LLaMA 3.2 3B (No Benchmarking, From Scratch)
# ===============================

import os
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer,
    DataCollatorForLanguageModeling
)

# ===============================
# Step 1: Load model
# ===============================
model_id = "meta-llama/Llama-3.2-3B-Instruct"  # Model name
cache_dir = "/workspace/model_cache"  # Set cache directory to store model locally

# Ensure the cache directory exists
os.makedirs(cache_dir, exist_ok=True)

# Load tokenizer from cache (if available); otherwise, download and store it
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

# Load model with reduced memory usage and automatic GPU allocation
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir=cache_dir
)

print("✅ Model successfully loaded from cache:", cache_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Model successfully loaded from cache: /workspace/model_cache


In [10]:
# ===============================
# Step 2: Define Dataset
# ===============================
data_path = "VerifiedQ_A.xlsx"
output_dir = "./llama3_finetune_medquad"
os.makedirs(output_dir, exist_ok=True)

# ===============================
# Step 3: Load Dataset
# ===============================
df = pd.read_excel(data_path)
df = df.dropna(subset=["Question", "Answer"])
print(df.head()) # Quick check

def format_prompt(example): # mimics instruction-following prompts which models like LLaMA are tuned for
    return {
        "text": f"### Question:\n{example['Question']}\n\n### Answer:\n{example['Answer']}"
    }

raw_dataset = Dataset.from_pandas(df) # Convert DataFrame to Hugging Face Dataset
formatted_dataset = raw_dataset.map(format_prompt) # Apply Prompt Formatting

                AnswerID    Unnamed: 2              Unnamed: 3  \
0  ADAM_0002818_Sec1.txt  3-Incomplete   ADAM_0002818_Sec1.txt   
1  ADAM_0002818_Sec2.txt  3-Incomplete   ADAM_0002818_Sec2.txt   
2  GARD_0004450_Sec1.txt  3-Incomplete   GARD_0004450_Sec1.txt   
3  GARD_0004450_Sec2.txt  3-Incomplete   GARD_0004450_Sec2.txt   
4  GARD_0004450_Sec3.txt  3-Incomplete   GARD_0004450_Sec3.txt   

                                            Question  \
0                    What is (are) Noonan syndrome ?   
1                      What causes Noonan syndrome ?   
2  What is (are) Noonan syndrome ? (Also called: ...   
3  What are the symptoms of Noonan syndrome ? (Al...   
4  Is Noonan syndrome inherited ? (Also called: M...   

                                              Answer  
0  Noonan syndrome is a disease that can be passe...  
1  Noonan syndrome is linked to defects in severa...  
2  Noonan syndrome is a genetic disorder that cau...  
3  What are the signs and symptoms of Noonan s

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

In [13]:
# ===============================
# Step 4: Tokenize Dataset
# ===============================
# Tokenize the formatted text prompts with max length and padding
# Returns token ids and attention masks used during training

def tokenize(example):
    return tokenizer(
        example["text"], truncation=True, padding="max_length", max_length=512
    )

# Apply tokenization to the entire dataset in batches for speed
tokenized_dataset = formatted_dataset.map(tokenize, batched=True)

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

In [14]:
# ===============================
# Step 5: Training Configuration
# ===============================
# Define all hyperparameters and runtime configs for training
training_args = TrainingArguments(
    output_dir=output_dir,  # Folder to save model checkpoints
    per_device_train_batch_size=4,  # Batch size per GPU
    gradient_accumulation_steps=1,  # Gradient steps before backward/update
    num_train_epochs=10,  # Number of passes over the full training data
    learning_rate=5e-5,  # Initial learning rate for AdamW optimizer
    bf16=True,  # Use bfloat16 if supported by GPU (e.g., A100)
    logging_dir=f"{output_dir}/logs",  # Save training logs here
    save_strategy="epoch",  # Save model once per epoch
    report_to="none",  # Disable integration with wandb/huggingface
    logging_steps=10,  # Log loss etc. every N steps
    save_total_limit=2,  # Keep only last 2 checkpoints to save disk
    warmup_steps=100  # Gradually ramp up LR to avoid instability
)

# Create a data collator that dynamically pads batches during training
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [15]:
# ===============================
# Step 6: Trainer
# ===============================
# The Trainer class handles the training loop and saves the best model
trainer = Trainer(
    model=model,  # The model to fine-tune
    args=training_args,  # Training configuration
    train_dataset=tokenized_dataset,  # Input data
    tokenizer=tokenizer,  # Tokenizer for saving and decoding
    data_collator=data_collator  # Handles padding during training
)

# Start training
trainer.train()

  trainer = Trainer(


Step,Training Loss
10,1.9706
20,1.6846
30,1.5128
40,1.3568
50,1.1673
60,1.0768
70,1.032
80,0.8554
90,0.5748
100,0.506


TrainOutput(global_step=380, training_loss=0.4065922520662609, metrics={'train_runtime': 545.3352, 'train_samples_per_second': 2.751, 'train_steps_per_second': 0.697, 'total_flos': 1.2988787982336e+16, 'train_loss': 0.4065922520662609, 'epoch': 10.0})

In [16]:
# ===============================
# Step 7: Save Model & Tokenizer
# ===============================
# Save the final fine-tuned model and tokenizer for inference or reuse
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"\n✅ Full fine-tuning complete! Model saved to {output_dir}")


✅ Full fine-tuning complete! Model saved to ./llama3_finetune_medquad


## Load the NEW Fine-Tuned Model

In [18]:
# ===============================
# Step 8: Load Fine-Tuned Full Model for Inference
# ===============================
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Define paths to the saved model and cache
finetuned_model_dir = "./llama3_finetune_medquad"  # Your saved model directory
cache_dir = "/workspace/model_cache"  # Cache path

# Load the fine-tuned model
model = AutoModelForCausalLM.from_pretrained(
    finetuned_model_dir,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir=cache_dir
)
model.eval()  # Set to eval mode to disable dropout etc.

# Load the tokenizer (from same fine-tuned folder)
tokenizer = AutoTokenizer.from_pretrained(finetuned_model_dir)
tokenizer.pad_token = tokenizer.eos_token  # Ensure pad token is defined

print("✅ Fine-tuned full model and tokenizer successfully loaded!")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Fine-tuned full model and tokenizer successfully loaded!


## Now go back the top and START from the "# FROM HERE, after reload the NEW & Trained model, you can re-run the codes below till the end of BLEU"