# Preparing the data 

In [1]:
sicknesses = {
    "Common Cold": {
        "Description": "A viral infection of the upper respiratory tract, primarily affecting the nose and throat.",
        "Symptoms": [
            "Runny or stuffy nose",
            "Sneezing",
            "Sore throat",
            "Coughing",
            "Mild headache",
            "Fatigue",
            "Low-grade fever"
        ],
        "Causes": "Caused by various viruses, most commonly rhinoviruses. Spreads through respiratory droplets or contact with contaminated surfaces.",
        "Medications": [
            "Decongestants (e.g., pseudoephedrine) for nasal congestion",
            "Pain relievers (e.g., ibuprofen or acetaminophen) for fever and headaches",
            "Cough suppressants (e.g., dextromethorphan)",
            "Rest, hydration, and soothing throat lozenges"
        ]
    },
    "Influenza (Flu)": {
        "Description": "A highly contagious viral infection that affects the respiratory system.",
        "Symptoms": [
            "High fever",
            "Chills",
            "Muscle aches",
            "Fatigue",
            "Sore throat",
            "Dry cough",
            "Nasal congestion"
        ],
        "Causes": "Caused by influenza viruses transmitted via respiratory droplets or contaminated surfaces.",
        "Medications": [
            "Antiviral drugs (e.g., oseltamivir or zanamivir) if taken early",
            "Pain relievers (e.g., acetaminophen or ibuprofen)",
            "Rest, hydration, and over-the-counter medications for specific symptoms"
        ]
    },
    "Gastroenteritis (Stomach Flu)": {
        "Description": "An inflammation of the stomach and intestines causing digestive distress.",
        "Symptoms": [
            "Nausea",
            "Vomiting",
            "Diarrhea",
            "Abdominal cramps",
            "Fever",
            "Dehydration"
        ],
        "Causes": "Viral (e.g., norovirus, rotavirus), bacterial (e.g., E. coli, Salmonella), or parasitic infections; consuming contaminated food or water.",
        "Medications": [
            "Oral rehydration solutions (ORS) for dehydration",
            "Antidiarrheal drugs (e.g., loperamide, but not recommended for bacterial causes)",
            "Antiemetics (e.g., ondansetron) for nausea",
            "Probiotics to restore gut flora"
        ]
    },
    "Urinary Tract Infection (UTI)": {
        "Description": "An infection in any part of the urinary system, including the bladder, urethra, or kidneys.",
        "Symptoms": [
            "Pain or burning during urination",
            "Frequent urge to urinate",
            "Cloudy or foul-smelling urine",
            "Pelvic pain",
            "Sometimes fever"
        ],
        "Causes": "Bacterial infection, often Escherichia coli (E. coli), which enters through the urethra. Poor hygiene and dehydration increase risk.",
        "Medications": [
            "Antibiotics (e.g., nitrofurantoin, trimethoprim-sulfamethoxazole)",
            "Pain relievers (e.g., phenazopyridine) for urinary discomfort",
            "Increased water intake to flush out bacteria"
        ]
    },
    "Allergic Rhinitis (Hay Fever)": {
        "Description": "An allergic reaction to airborne allergens like pollen, dust mites, or pet dander.",
        "Symptoms": [
            "Sneezing",
            "Runny or congested nose",
            "Itchy eyes, throat, or nose",
            "Watery eyes",
            "Fatigue"
        ],
        "Causes": "Allergens trigger the immune system to release histamines, causing symptoms.",
        "Medications": [
            "Antihistamines (e.g., loratadine, cetirizine)",
            "Nasal corticosteroids (e.g., fluticasone, mometasone)",
            "Decongestants (e.g., pseudoephedrine) for temporary relief",
            "Allergen avoidance and air purifiers to reduce exposure"
        ]
    }
}


# Setting up the API key for gemini

In [None]:
import google.generativeai as genai
genai.configure(api_key='YOUR_API_KEY')

# Embedding

In [4]:
import faiss
import numpy as np

# Prepare the data
sickness_names = list(sicknesses.keys())
symptoms_texts = ['; '.join(sicknesses[s]['Symptoms']) for s in sickness_names]

def get_embedding(text):
    embedding_result = genai.embed_content(
        model='models/embedding-001',
        content=text
    )
    if 'embedding' in embedding_result:
        embedding_vector = np.array(embedding_result['embedding'], dtype=np.float32)
        return embedding_vector
    else:
        raise ValueError(f"No 'embedding' field found for document: {text}")

# Generate embeddings for symptoms_texts
embeddings = [get_embedding(text) for text in symptoms_texts]

# Checking

In [5]:
print(f"Number of embedded documents: {len(embeddings)}")
print(f"Embedding dimension: {len(embeddings[0])}")

Number of embedded documents: 5
Embedding dimension: 768


# Saving

In [None]:
import os
import json

# Convert embeddings to numpy array
embedded_docs_np = np.array(embeddings, dtype=np.float32)

# Normalize vectors for cosine similarity
faiss.normalize_L2(embedded_docs_np)

# Create FAISS index
embedding_dim = embedded_docs_np.shape[1]
index = faiss.IndexFlatIP(embedding_dim)
index.add(embedded_docs_np)

# Create save directory
save_folder = "rag_system"
os.makedirs(save_folder, exist_ok=True)

# Save FAISS index
index_path = os.path.join(save_folder, "index.faiss")
faiss.write_index(index, index_path)

# Save documents mapping
docs_mapping = {
    str(i): {
        "sickness_name": sickness_names[i],
        "symptoms_text": symptoms_texts[i],
    }
    for i in range(len(sickness_names))
}

with open(os.path.join(save_folder, "documents.json"), "w") as f:
    json.dump(docs_mapping, f)