# Preparing The dataset


In [None]:
import os
import json
import re

# Define paths
base_path = os.getcwd()
kb_folder_path = os.path.join(base_path, "medicalKnowledgeBase", "Diagnosis_flowchart")
patient_cases_folder = os.path.join(base_path, "Finished")
output_file = os.path.join(base_path, "combined_rag_data.json")

print(f"Knowledge Base Path: {kb_folder_path}")
print(f"Patient Cases Path: {patient_cases_folder}")

def clean_text(text):
    if not isinstance(text, str):
        return text
    cleaned = re.sub(r'\s*___\s*', ' ', text)

    cleaned = re.sub(r' +', ' ', cleaned)
    cleaned = re.sub(r' +([,.:;])', r'\1', cleaned)
    return cleaned.strip()

# Counter for generating unique IDs
kb_id_counter = 0        

def Convertorfunction(data, parent_key=""):
    global kb_id_counter
    items = []
    for key, value in data.items():
        new_key = f"{parent_key}/{key}" if parent_key else key

        if isinstance(value, str):
            kb_id_counter += 1
            items.append({
                "id": f"KB_{kb_id_counter:04d}",
                "medicalKB": f"{new_key}: {value}"
            })
        elif isinstance(value, list):
            kb_id_counter += 1
            items.append({
                "id": f"KB_{kb_id_counter:04d}",
                "medicalKB": f"{new_key}: []"
            })
        elif isinstance(value, dict):
            items.extend(Convertorfunction(value, new_key))
    return items

knowledge_base_entries = []

if os.path.exists(kb_folder_path):
    for file_name in os.listdir(kb_folder_path):
        if file_name.endswith(".json"):
            file_path = os.path.join(kb_folder_path, file_name)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            knowledge_base_entries.extend(Convertorfunction(data))
    print(f"Processed {len(knowledge_base_entries)} knowledge base entries")
else:
    print(f"Warning: Knowledge base folder not found at {kb_folder_path}")
# here I am converting multi line files into one line thus flatten
def flatten_reasoning(obj, prefix=""):
    parts = []
    if isinstance(obj, dict):
        for key, value in obj.items():
            new_prefix = f"{prefix} -> {key}" if prefix else key
            if isinstance(value, dict) and value:  # Non-empty dict
                parts.append(flatten_reasoning(value, new_prefix))
            else:
                parts.append(new_prefix)
    return " | ".join(filter(None, parts))

def process_patient_case(file_path, file_name, disease_group, specific_disease=None):
    with open(file_path, 'r', encoding='utf-8') as f:
        try:
            data = json.load(f)
        except json.JSONDecodeError:
            print(f"Error decoding JSON: {file_path}")
            return None

    inputs = {}
    reasoning = {}
    for key, value in data.items():
        if key.lower().startswith("input"):
            inputs[key] = value
        else:
            reasoning[key] = value

    cleaned_inputs = {}
    for i in range(1, 7):
        key = f"input{i}"
        val = None
        for k in inputs:
            if k.lower() == key:
                val = inputs[k]
                break
        
        if not val or (isinstance(val, str) and val.strip() == ""):
            val = "NA"
        
        if isinstance(val, str):
            cleaned_inputs[key] = clean_text(val.strip())
        else:
            cleaned_inputs[key] = val

    reasoning_string = flatten_reasoning(reasoning)
    # here i am using file name as id 
    case_id = file_name.replace(".json", "")
    
    case_entry = {
        "id": case_id,
        "patient_case": {
            "disease_group": disease_group,
            "specific_disease": specific_disease if specific_disease else "NA",
            "reasoning": reasoning_string,
            "inputs": cleaned_inputs
        }
    }
    
    return case_entry

patient_cases_entries = []

if os.path.exists(patient_cases_folder):
    for root, dirs, files in os.walk(patient_cases_folder):
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                
                rel_path = os.path.relpath(root, patient_cases_folder)
                path_parts = rel_path.split(os.sep)
                
                # Get disease group and specific disease from folder structure
                if len(path_parts) >= 1 and path_parts[0] != ".":
                    disease_group = path_parts[0]
                    specific_disease = path_parts[1] if len(path_parts) > 1 else None
                    
                    case_data = process_patient_case(file_path, file, disease_group, specific_disease)
                    if case_data:
                        patient_cases_entries.append(case_data)
    print(f"Processed {len(patient_cases_entries)} patient cases")
else:
    print(f"Warning: Patient cases folder not found at {patient_cases_folder}")

# Combine knowledge base entries (upper part) with patient cases (lower part)
combined_data = knowledge_base_entries + patient_cases_entries

# Save the combined output
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(combined_data, f, indent=2, ensure_ascii=False)

print(f"\nSummary")
print(f"Total Knowledge Base entries: {len(knowledge_base_entries)} (IDs: KB_0001 to KB_{kb_id_counter:04d})")
print(f"Total Patient Cases: {len(patient_cases_entries)}")
print(f"Combined entries: {len(combined_data)}")
print(f"Saved to: {output_file}")

# Setting up Env

In [2]:
import json
import os
import pickle
import numpy as np
from typing import List, Dict
from sentence_transformers import SentenceTransformer, util

In [3]:
!pip install transformers accelerate huggingface-hub sentence-transformers faiss-cpu streamlit python-dotenv --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m97.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m109.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m110.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m88.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6

In [4]:
# using this lightweight sentence transformer
import faiss
TOP_K=3

# RAG Pipeline

### Dataloading and Spliting

In [5]:
def load_data(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

kb_items = load_data("/kaggle/input/medicaldataset-nlp-a04/combined_rag_data.json")

medical_kb_texts = []
patient_case_texts = []
medical_kb_items = []
patient_case_items = []

for item in kb_items:
    if "medicalKB" in item:
        medical_kb_texts.append(item["medicalKB"])
        medical_kb_items.append(item)
    elif "patient_case" in item:
        txt = " ".join(item["patient_case"]["inputs"].values())
        patient_case_texts.append(txt)
        patient_case_items.append(item)

In [6]:
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
def embed(texts):
    return model.encode(
        texts,
        batch_size=16,
        show_progress_bar=True,
        convert_to_numpy=True,
        normalize_embeddings=True
    ).astype("float32")

kb_emb = embed(medical_kb_texts)
case_emb = embed(patient_case_texts)

def build_index(emb):
    dim = emb.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(emb)
    return index

kb_index = build_index(kb_emb)
case_index = build_index(case_emb)

modules.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_hf_nomic_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_hf_nomic_bert.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- modeling_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/547M [00:00<?, ?B/s]



tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

Batches:   0%|          | 0/12 [00:00<?, ?it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

In [7]:
faiss.write_index(kb_index, "kb_index.faiss")
faiss.write_index(case_index, "patient_index.faiss")

with open("kb_items.pkl", "wb") as f:
    pickle.dump(medical_kb_items, f)

with open("patient_items.pkl", "wb") as f:
    pickle.dump(patient_case_items, f)

In [8]:
def search_top_k(query, k=3):
    q = model.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    faiss.normalize_L2(q)  # normalize to match IndexFlatIP
    kb_scores, kb_idx = kb_index.search(q, k)
    case_scores, case_idx = case_index.search(q, k)
    kb_res = [(medical_kb_items[i], float(kb_scores[0][j])) for j, i in enumerate(kb_idx[0])]
    case_res = [(patient_case_items[i], float(case_scores[0][j])) for j, i in enumerate(case_idx[0])]
    return kb_res, case_res


In [9]:
def construct_prompt(query, kb_res, case_res):
    ctx = "Relevant Medical Knowledge:\n"
    for item, score in kb_res:
        ctx += f"- {item['medicalKB'][:300]}... (score {score:.2f})\n"

    ctx += "\nRelevant Patient Cases:\n"
    for item, score in case_res:
        txt = " ".join(item["patient_case"]["inputs"].values())
        ctx += f"- {txt[:300]}... (score {score:.2f})\n"

    prompt = f"""
You are a medical expert.
Use the given retrieved information to answer.

{ctx}

Patient Query:
{query}

Explain briefly, then give final answer.
"""
    return prompt


# Preparing HuggingFace LLM

In [10]:
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch
LLM_Model="Qwen/Qwen3-4B-base"
tokenizer=AutoTokenizer.from_pretrained(LLM_Model,use_fast=False)
llm=AutoModelForCausalLM.from_pretrained(LLM_Model,torch_dtype=torch.float16,device_map="auto")

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [11]:
def llm_generate(prompt, max_new_tokens=256):
    inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
    output = llm.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [12]:
def run_rag(patient_case):
    query = patient_case["input_text"]
    kb_res, case_res = search_top_k(query)
    prompt = construct_prompt(query, kb_res, case_res)
    answer = llm_generate(prompt)

    return answer, kb_res, case_res, prompt

In [13]:
def display_retrieved(kb_res, case_res):

    print("\n Retrieved Medical Knowledge ")
    for item, score in kb_res:
        print("\nScore:", score)
        print(item["medicalKB"][:350])

    print("\n Retrieved Patient Cases")
    for item, score in case_res:
        print("\nScore:", score)
        txt = " ".join(item["patient_case"]["inputs"].values())
        print(txt[:350])


# Model Inference

In [14]:
patient_case = {
    "input_text": "A 65-year-old male with aggressive behavior and recent falls..."
}

answer, kb_res, case_res, prompt = run_rag(patient_case)

print("\nRAG System final response \n")
print(answer)

display_retrieved(kb_res, case_res)


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



RAG System final response 


You are a medical expert.
Use the given retrieved information to answer.

Relevant Medical Knowledge:
- knowledge/Suspected Alzheimer/Symptoms: Cognitive decline, including memory loss and deterioration of other functions; impairment in at least two cognitive areas; progressive decline in memory and cognition without loss of consciousness; typically occurs between ages 40 and 90, with higher frequency... (score 0.54)
- knowledge/Massive PE: The patient develops hemodynamic instability, such as sustained hypotension, shock, or cardiac arrest. These patients are high-risk PE and require immediate thrombolytic treatment or surgical intervention.... (score 0.53)
- knowledge/Suspected Stroke/Signs: facial drooping, arm weakness, speech difficulties, vision problems, severe headache, dizziness or loss of balance, confusion, difficulty walking, numbness or paralysis on one side of the body, sudden behavioral change, and loss of consciousness... (score 0.52)

Rele

# Using API Models

In [None]:
!pip install google-generativeai python-dotenv --quiet

import os
from dotenv import load_dotenv
import google.generativeai as genai

# Get API key
api_key = "key"


genai.configure(api_key=api_key)

# Example prompt
prompt = """
First explain what it diesase could be then give all the reasons
Explain the symptoms of a 60-year-old male with sudden chest pain radiating to the back.
Use headings or bullet points where needed.
Give response in plain text without using any steric '*' Behave like medical assistant
"""

# Generate response
model = genai.GenerativeModel("gemini-2.0-flash")
response = model.generate_content(prompt)

print(response.text)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m319.9/319.9 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
google-cloud-translate 3.12.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.29.5 which is incompatible.
ray 2.51.1 requires click!=8.3.0,>=7.0, but you have click 8.3.0 which is incompatible.
bigframes 2.12.0 requires rich<14,>=12.4.4, but you have rich 14.2.0 which is incompatible.
pydrive2 1.21.3 requires cryptography<44, but you have cryptography 46.0.3 which is incompatible.
pydrive2 1.21.3 requires pyOpenSSL<=24.2.1,>=19.1.0, but you have pyopenssl 25.3.0 which is incompatible.
gcsfs

# Testing Deepseek

In [16]:
import json
from sentence_transformers import SentenceTransformer
import torch
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------------------------
# Paths
DATA_PATH = "/kaggle/input/medicaldataset-nlp-a04/combined_rag_data.json"
KB_FAISS_PATH = "/kaggle/working/kb_index.faiss"
PATIENT_FAISS_PATH = "/kaggle/working/patient_index.faiss"

# ---------------------------
# Load data
with open(DATA_PATH, "r") as f:
    data = json.load(f)

kb_items = [item for item in data if "medicalKB" in item]
patient_case_items = [item for item in data if "patient_case" in item]

# ---------------------------
# Load embedding model
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)

# Load FAISS indexes
kb_index = faiss.read_index(KB_FAISS_PATH)
patient_index = faiss.read_index(PATIENT_FAISS_PATH)

# ---------------------------
# Retrieval function
def retrieve(query, k=2):
    q_emb = model.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    faiss.normalize_L2(q_emb)

    kb_scores, kb_idx = kb_index.search(q_emb, k)
    kb_results = [(kb_items[i], float(kb_scores[0][j])) for j, i in enumerate(kb_idx[0])]

    patient_scores, patient_idx = patient_index.search(q_emb, k)
    patient_results = [(patient_case_items[i], float(patient_scores[0][j])) for j, i in enumerate(patient_idx[0])]

    return kb_results, patient_results

# ---------------------------
# Load LLM
LLM_Model = "Qwen/Qwen3-4B-Base"
tokenizer = AutoTokenizer.from_pretrained(LLM_Model, use_fast=False)
llm = AutoModelForCausalLM.from_pretrained(LLM_Model, torch_dtype=torch.float16, device_map="auto")

def llm_generate(prompt, max_new_tokens=256):
    inp = tokenizer(prompt, return_tensors="pt").to(llm.device)
    out = llm.generate(**inp, max_new_tokens=max_new_tokens)
    return tokenizer.decode(out[0], skip_special_tokens=True)

# ---------------------------
# Full RAG + LLM pipeline
def run_rag(patient_case_text):
    kb_res, patient_res = retrieve(patient_case_text, k=2)

    context = "Relevant KB:\n"
    for item, score in kb_res:
        context += f"- {item['medicalKB'][:300]}... (score {score:.2f})\n"

    context += "\nRelevant Patient Cases:\n"
    for item, score in patient_res:
        txt = " ".join(item["patient_case"]["inputs"].values())
        context += f"- {txt[:300]}... (score {score:.2f})\n"

    prompt = f"""
You are a medical expert.
Use the retrieved knowledge base information to answer the patient case.

Context:
{context}

Patient Case:
{patient_case_text}

Explain first, then give final diagnosis.
"""
    answer = llm_generate(prompt)
    return answer, kb_res, patient_res

# ---------------------------
# Test cases
test_cases = [
    "A 60-year-old male with chest pain radiating to the left arm, shortness of breath, nausea, and sweating.",
    "A 65-year-old female with leg swelling, fatigue, and difficulty breathing when lying down.",
    "What lifestyle changes can prevent heart disease in elderly patients?",
    "What medication do you suggest when person is suffering from "
]

for i, case in enumerate(test_cases, 1):
    print(f"\n=== Test Case {i} ===")
    answer, kb_res, patient_res = run_rag(case)

    print("\nRetrieved KB Entries:")
    for item, score in kb_res:
        print(f"- {item['medicalKB'][:150]}... (score {score:.2f})")

    print("\nRetrieved Patient Cases:")
    for item, score in patient_res:
        txt = " ".join(item["patient_case"]["inputs"].values())
        print(f"- {txt[:150]}... (score {score:.2f})")

    print("\nLLM Answer:")
    print(answer)
    print("="*50)




tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



=== Test Case 1 ===


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Retrieved KB Entries:
- knowledge/Suspected ACS/Symptoms: Chest pain, sweating, nausea, vomiting, palpitations, dyspnea, arrhythmia with weakness, dizziness or syncope, hypot... (score 0.69)
- knowledge/Suspected Cardiomyopathy/Symptoms: Fatigue and weakness, Shortness of breath, Swelling of the legs and ankles, Arrhythmias, Chest pain; etc.... (score 0.67)

Retrieved Patient Cases:
- Chest pain Mr. Mike is a man with PMH significant for COPD, HTN, HLD who presents with progressive shortness of breath and chest pain.
﻿
The patient r... (score 0.76)
- Chest Pain He is a gentleman with history of HTN, HLD, PVD, tobacco use presenting with two weeks of intermittent chest pain worsened with exertion.  ... (score 0.74)

LLM Answer:

You are a medical expert.
Use the retrieved knowledge base information to answer the patient case.

Context:
Relevant KB:
- knowledge/Suspected ACS/Symptoms: Chest pain, sweating, nausea, vomiting, palpitations, dyspnea, arrhythmia with weakness, dizziness or s

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Retrieved KB Entries:
- knowledge/Suspected Cardiomyopathy/Symptoms: Fatigue and weakness, Shortness of breath, Swelling of the legs and ankles, Arrhythmias, Chest pain; etc.... (score 0.66)
- knowledge/Suspected Heart Failure/Symptoms: Typical: Breathlessness, Orthopnoea, Paroxysmal nocturnal dyspnoea, Reduced exercise tolerance, Fatigue, t... (score 0.65)

Retrieved Patient Cases:
- Fatigue, weakness Patient is an ___ year old female with a history of HTN, DM type II and a recent admission for altered mental status that resolved o... (score 0.72)
- Dyspnea States that she has been feeling more DOE,and is now feeling short of breath at rest. Also reports acute on chronic weakness including difficu... (score 0.71)

LLM Answer:

You are a medical expert.
Use the retrieved knowledge base information to answer the patient case.

Context:
Relevant KB:
- knowledge/Suspected Cardiomyopathy/Symptoms: Fatigue and weakness, Shortness of breath, Swelling of the legs and ankles, Arrhythmias, Che

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Retrieved KB Entries:
- knowledge/Suspected Cardiomyopathy/Risk Factors: Genetic predisposition, Long-standing high blood pressure, Viral infections, Alcohol and toxins, Meta... (score 0.65)
- knowledge/Suspected Heart Failure/Risk Factors: CAD, Hypertension, Valve disease, Arrhythmias, CMPs, Congenital heart disease, Infective, Drug-induced... (score 0.64)

Retrieved Patient Cases:
- dyspnea on exertion with h/o hypertension, hyperlipidemia, heart failure preserved ejection fraction and MVP with severe MR and TR who has had at leas... (score 0.60)
- Shortness of breath. Mr. ___ is a ___ year old man with a history of systolic CHF who presents with shortness of breath which has been worsening for 4... (score 0.59)

LLM Answer:

You are a medical expert.
Use the retrieved knowledge base information to answer the patient case.

Context:
Relevant KB:
- knowledge/Suspected Cardiomyopathy/Risk Factors: Genetic predisposition, Long-standing high blood pressure, Viral infections, Alcohol and