# üß¨ MEXAR Nano: SympScan Integration


**Dataset:** SympScan (Symptoms, Descriptions, Diets, Medications, Precautions, Workouts)

This notebook trains the **Mexar Nano** model on the SympScan dataset.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Setup & Library Imports

In [None]:
import os
import zipfile
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Distillation Hyperparameters
TEMPERATURE = 4.0   # Softens the teacher's probability distribution
ALPHA = 0.7         # Weight for Distillation Loss (0.7 from Teacher, 0.3 from Labels)
BATCH_SIZE = 32
EPOCHS_TEACHER = 40
EPOCHS_STUDENT = 40 # Student often needs a bit more time to settle

## 2. Data Loading

We load the main training data (`Diseases_and_Symptoms_dataset.csv`) and the supplementary knowledge base files.

**Note:** Ensure your `archive.zip` is extracted so the CSV files are available.

In [None]:
# --- CONFIGURATION ---

print("\n[1/7] Processing Data...")
#  Auto-extract if needed
if os.path.exists("archive.zip"):
    with zipfile.ZipFile("archive.zip", 'r') as zip_ref:
        zip_ref.extractall(".")
    print("   - Extracted archive.zip")

# Load Main Dataset
try:
    df = pd.read_csv('/content/drive/MyDrive/Dataset/SympScan/Diseases_and_Symptoms_dataset.csv')
    # Cleanup: Fill NaNs with 0
    df = df.fillna(0)
    print(f"   - Main Dataset Loaded: {df.shape}")
except FileNotFoundError:
    raise FileNotFoundError("CRITICAL: 'Diseases_and_Symptoms_dataset.csv' not found.")

# Separate Features (Symptoms) and Target (Disease)
# The dataset has 'diseases' in column 0, and symptoms in columns 1..N
X_raw = df.iloc[:, 1:].values.astype(float)
y_raw = df.iloc[:, 0].values

symptom_names = list(df.columns[1:])
print(f"   - Detected {len(symptom_names)} Symptoms")

# Encode Targets
le = LabelEncoder()
y_encoded = le.fit_transform(y_raw)
disease_names = le.classes_
print(f"   - Detected {len(disease_names)} Diseases")

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_raw, y_encoded, test_size=0.2, random_state=42)

# PyTorch Dataset
class MedicalDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.FloatTensor(features)
        self.labels = torch.LongTensor(labels)
    def __len__(self):
        return len(self.features)
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_loader = DataLoader(MedicalDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(MedicalDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False)



[1/7] Processing Data...
   - Main Dataset Loaded: (96088, 231)
   - Detected 230 Symptoms
   - Detected 100 Diseases


## 3. CONSTRUCTING KNOWLEDGE BASE



In [None]:
# This merges all the helper CSVs into one dictionary for the app
print("\n[2/7] Building Knowledge Base...")

kb_files = {
    'description': '/content/drive/MyDrive/Dataset/SympScan/description.csv',
    'diets': '/content/drive/MyDrive/Dataset/SympScan/diets.csv',
    'medications': '/content/drive/MyDrive/Dataset/SympScan/medications.csv',
    'precautions': '/content/drive/MyDrive/Dataset/SympScan/precautions.csv',
    'workout': '/content/drive/MyDrive/Dataset/SympScan/workout.csv'
}

full_knowledge_base = {}

for key, fname in kb_files.items():
    if os.path.exists(fname):
        sub_df = pd.read_csv(fname)
        # Normalize columns: remove whitespace, lowercase
        sub_df.columns = [c.strip().lower() for c in sub_df.columns]

        # We assume column 0 is the disease name key
        key_col = sub_df.columns[0]

        # Convert to dictionary: { "DiseaseName": {data...} }
        full_knowledge_base[key] = sub_df.set_index(key_col).to_dict(orient='index')
        print(f"   - Merged {key} ({len(sub_df)} entries)")
    else:
        print(f"   - ‚ö†Ô∏è Missing {fname}")



[2/7] Building Knowledge Base...
   - Merged description (100 entries)
   - Merged diets (100 entries)
   - Merged medications (100 entries)
   - Merged precautions (100 entries)
   - Merged workout (100 entries)


## 4. MODEL ARCHITECTURES

In [None]:

# --- TEACHER MODEL (Large, Powerful) ---
class MexarTeacher(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MexarTeacher, self).__init__()
        # Deeper, wider layers to learn complex patterns
        self.net = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),

            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)

# --- STUDENT MODEL (Nano - The one we export) ---
class MexarNano(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MexarNano, self).__init__()
        # Shallow, narrow layers for speed and mobile efficiency
        self.net = nn.Sequential(
            nn.Linear(input_size, 128), # Standard mobile-friendly width
            nn.ReLU(),
            # No BatchNorm or Dropout in inference layers to keep it raw and fast,
            # though sometimes helpful for training. We keep it simple.
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

# Instantiate
input_dim = len(symptom_names)
num_classes = len(disease_names)

teacher = MexarTeacher(input_dim, num_classes).to(device)
student = MexarNano(input_dim, num_classes).to(device)

print(f"\n[3/7] Architectures Initialized")
print(f"   - Teacher Params: {sum(p.numel() for p in teacher.parameters())}")
print(f"   - Student Params: {sum(p.numel() for p in student.parameters())} (Compressed Version)")


[3/7] Architectures Initialized
   - Teacher Params: 297188
   - Student Params: 44324 (Compressed Version)


## 5. TRAINING THE TEACHER

In [None]:
print(f"\n[4/7] Training Teacher Model ({EPOCHS_TEACHER} Epochs)...")

optimizer_T = optim.Adam(teacher.parameters(), lr=0.001)
criterion_T = nn.CrossEntropyLoss()

for epoch in range(EPOCHS_TEACHER):
    teacher.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer_T.zero_grad()
        outputs = teacher(inputs)
        loss = criterion_T(outputs, labels)
        loss.backward()
        optimizer_T.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Validation
    if (epoch+1) % 5 == 0:
        print(f"   Epoch {epoch+1}: Loss {running_loss/len(train_loader):.4f} | Acc {100*correct/total:.2f}%")

print("   ‚úÖ Teacher Training Complete.")


[4/7] Training Teacher Model (40 Epochs)...
   Epoch 5: Loss 0.3067 | Acc 87.94%
   Epoch 10: Loss 0.2687 | Acc 88.51%
   Epoch 15: Loss 0.2503 | Acc 88.69%
   Epoch 20: Loss 0.2375 | Acc 89.03%
   Epoch 25: Loss 0.2328 | Acc 89.14%
   Epoch 30: Loss 0.2262 | Acc 89.14%
   Epoch 35: Loss 0.2211 | Acc 89.39%
   Epoch 40: Loss 0.2165 | Acc 89.44%
   ‚úÖ Teacher Training Complete.


## 6. KNOWLEDGE DISTILLATION (The Important Part)

In [None]:
print(f"\n[5/7] Distilling Knowledge to Student (Mexar Nano)...")
print(f"   - Temperature: {TEMPERATURE}, Alpha: {ALPHA}")

optimizer_S = optim.Adam(student.parameters(), lr=0.001)

def distillation_loss(student_logits, teacher_logits, labels, T, alpha):
    # 1. Soft Loss (KL Divergence between Student/Teacher probabilities)
    # We use LogSoftmax on Student and Softmax on Teacher
    soft_targets = F.softmax(teacher_logits / T, dim=1)
    student_log_soft = F.log_softmax(student_logits / T, dim=1)

    # KLDivLoss expects input to be log-probs, target to be probs
    distill_loss = nn.KLDivLoss(reduction='batchmean')(student_log_soft, soft_targets) * (T * T)

    # 2. Hard Loss (Standard CrossEntropy with actual labels)
    student_hard_loss = F.cross_entropy(student_logits, labels)

    # Combine
    return alpha * distill_loss + (1.0 - alpha) * student_hard_loss

teacher.eval() # Teacher is frozen now
student.train()

for epoch in range(EPOCHS_STUDENT):
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Get Teacher's "Opinion" (No grad needed)
        with torch.no_grad():
            teacher_logits = teacher(inputs)

        # Get Student's Prediction
        optimizer_S.zero_grad()
        student_logits = student(inputs)

        # Calculate Distillation Loss
        loss = distillation_loss(student_logits, teacher_logits, labels, TEMPERATURE, ALPHA)

        loss.backward()
        optimizer_S.step()

        running_loss += loss.item()
        _, predicted = torch.max(student_logits.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    if (epoch+1) % 5 == 0:
        print(f"   Epoch {epoch+1}: Loss {running_loss/len(train_loader):.4f} | Acc {100*correct/total:.2f}%")

print("   ‚úÖ Distillation Complete.")


[5/7] Distilling Knowledge to Student (Mexar Nano)...
   - Temperature: 4.0, Alpha: 0.7
   Epoch 5: Loss 0.3949 | Acc 89.35%
   Epoch 10: Loss 0.2685 | Acc 89.58%
   Epoch 15: Loss 0.2348 | Acc 89.65%
   Epoch 20: Loss 0.2180 | Acc 89.76%
   Epoch 25: Loss 0.2066 | Acc 89.83%
   Epoch 30: Loss 0.1987 | Acc 89.94%
   Epoch 35: Loss 0.1929 | Acc 89.96%
   Epoch 40: Loss 0.1887 | Acc 89.90%
   ‚úÖ Distillation Complete.


## 7. FINAL EVALUATION

In [None]:
print("\n[6/7] Comparing Models on Test Data...")

def evaluate_model(model, loader, name):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"   - {name} Accuracy: {100 * correct / total:.2f}%")

evaluate_model(teacher, test_loader, "Teacher (Heavy)")
evaluate_model(student, test_loader, "Student (Nano)")


[6/7] Comparing Models on Test Data...
   - Teacher (Heavy) Accuracy: 88.71%
   - Student (Nano) Accuracy: 88.64%


## üß™ MODEL TEST

In [None]:
import pandas as pd
import torch
import torch.nn.functional as F

# ==========================================
# 1. DEFINE TREATMENT ENGINE
# ==========================================
class TreatmentEngine:
    def __init__(self, diets_path, meds_path, precautions_path, workout_path):
        # Load all auxiliary files
        self.diets = pd.read_csv("/content/drive/MyDrive/Dataset/SympScan/diets.csv")
        self.meds = pd.read_csv("/content/drive/MyDrive/Dataset/SympScan/medications.csv")
        self.precautions = pd.read_csv("/content/drive/MyDrive/Dataset/SympScan/precautions.csv")
        self.workout = pd.read_csv("/content/drive/MyDrive/Dataset/SympScan/workout.csv")

        # Normalize disease column names for matching
        # (Assuming first column is always the disease name)
        self.diets.columns = ['Disease', 'Diet']
        self.meds.columns = ['Disease', 'Medication']
        self.precautions.columns = ['Disease', 'Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']
        self.workout.columns = ['Disease', 'Workout']

    def get_plan(self, disease_name):
        """Fetches the full care plan for a predicted disease."""
        plan = {}

        # Helper to get value safely
        def get_val(df, col):
            # Case-insensitive match
            row = df[df['Disease'].str.lower() == disease_name.lower()]
            return row[col].values[0] if not row.empty else "Not specified."

        # Fetch details
        plan['Diet'] = get_val(self.diets, 'Diet')
        plan['Meds'] = get_val(self.meds, 'Medication')
        plan['Workout'] = get_val(self.workout, 'Workout')

        # Precautions are spread across 4 columns
        p_row = self.precautions[self.precautions['Disease'].str.lower() == disease_name.lower()]
        if not p_row.empty:
            plan['Precautions'] = [
                p_row['Precaution_1'].values[0],
                p_row['Precaution_2'].values[0],
                p_row['Precaution_3'].values[0],
                p_row['Precaution_4'].values[0]
            ]
        else:
            plan['Precautions'] = []

        return plan

# ==========================================
# 2. INITIALIZE THE DOCTOR AI
# ==========================================
# Note: Ensure these paths match where your files are.
# Based on your notebook, they might be in 'archive.zip/' or '/content/drive/...'
# I will try the standard extracted paths first.
try:
    doctor_ai = TreatmentEngine(
        "/content/drive/MyDrive/Dataset/SympScan/diets.csv",
        "/content/drive/MyDrive/Dataset/SympScan/medications.csv",
        "/content/drive/MyDrive/Dataset/SympScan/precautions.csv",
        "/content/drive/MyDrive/Dataset/SympScan/workout.csv"
    )
    print("‚úÖ Doctor AI (Treatment Engine) initialized successfully.")
except FileNotFoundError:
    print("‚ö†Ô∏è Files not found in 'archive.zip/'. Trying Drive paths from your notebook...")
    # Fallback to the paths seen in your notebook variables
    doctor_ai = TreatmentEngine(
        kb_files['diets'],
        kb_files['medications'],
        kb_files['precautions'],
        kb_files['workout']
    )
    print("‚úÖ Doctor AI initialized using Drive paths.")

# ==========================================
# 3. DEFINE PREDICTION FUNCTIONS
# ==========================================

def predict_and_explain_patient(model, symptom_list, symptom_names, disease_names):
    """
    Takes a list of symptoms (strings), predicts the disease,
    and explains which symptoms drove the decision.
    """
    model.eval()

    # 1. Create a blank feature vector
    input_vector = torch.zeros(1, len(symptom_names)).to(device)

    # 2. Map input strings to the feature vector
    dataset_symptoms = [str(s).lower().strip() for s in symptom_names]
    found_symptoms = []

    print(f"\n--- Analyzing Symptoms: {symptom_list} ---")
    for s in symptom_list:
        s_clean = s.lower().strip()

        # exact match check
        if s_clean in dataset_symptoms:
            idx = dataset_symptoms.index(s_clean)
            input_vector[0, idx] = 1.0
            found_symptoms.append(s_clean)
        else:
            print(f"  [!] Warning: Symptom '{s}' not found in dataset. Ignoring.")

    if len(found_symptoms) == 0:
        print("  Error: No valid symptoms provided.")
        return None

    # 3. Forward Pass
    outputs = model(input_vector)
    if isinstance(outputs, tuple):
        logits = outputs[0]
    else:
        logits = outputs

    probs = torch.softmax(logits, dim=1)

    # Get Top-1 Prediction
    top_prob, top_idx = probs.topk(1)
    predicted_disease = disease_names[top_idx.item()]
    confidence = top_prob.item() * 100

    print(f"  >> Diagnosis: {predicted_disease.upper()} ({confidence:.2f}%)")
    return predicted_disease

def diagnose_and_prescribe(model, symptom_list):
    # 1. Diagnose
    disease_name = predict_and_explain_patient(model, symptom_list, train_ds.symptom_names, train_ds.disease_names)

    if disease_name:
        # 2. Prescribe
        plan = doctor_ai.get_plan(disease_name)
        print(f"\n--- üíä TREATMENT PLAN FOR: {disease_name.upper()} ---")
        print(f"ü•ó Diet:        {plan['Diet']}")
        print(f"üíä Medication:  {plan['Meds']}")
        print(f"üèÉ Workout:     {plan['Workout']}")

        # Format precautions nicely
        precautions = [str(p) for p in plan['Precautions'] if str(p) != 'nan']
        print(f"‚ö†Ô∏è Precautions: {', '.join(precautions)}")

‚úÖ Doctor AI (Treatment Engine) initialized successfully.


In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder

# 1. Define the Dataset Class (So Python knows how to read your file)
class SymptomDiseaseDataset(Dataset):
    def __init__(self, csv_path, label_encoder=None):
        # Load the new SympScan dataset
        # We try to handle different delimiters just in case
        try:
            data = pd.read_csv("/content/drive/MyDrive/Dataset/SympScan/Diseases_and_Symptoms_dataset.csv")
        except:
            print("‚ö†Ô∏è Error reading CSV. Checking path...")
            return

        # In SympScan: Column 0 is Disease, Col 1+ are Symptoms
        self.raw_labels = data.iloc[:, 0].astype(str)
        self.features_df = data.iloc[:, 1:]

        # Convert 0/1 features to Float32
        self.X = self.features_df.values.astype(np.float32)

        # Encode Labels (Disease Names -> 0, 1, 2...)
        if label_encoder is None:
            self.label_encoder = LabelEncoder()
            self.y = self.label_encoder.fit_transform(self.raw_labels)
        else:
            self.label_encoder = label_encoder
            self.y = self.label_encoder.transform(self.raw_labels)

        self.symptom_names = list(self.features_df.columns)
        self.disease_names = list(self.label_encoder.classes_)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx], dtype=torch.long)

# 2. Initialize 'train_ds' (This creates the missing variable)
# Ensure the path matches where your 'Diseases_and_Symptoms_dataset.csv' is located.
csv_path = "/content/drive/MyDrive/Dataset/SympScan/Diseases_and_Symptoms_dataset.csv"

# If you are using Google Drive, uncomment and update this line:
# csv_path = "/content/drive/MyDrive/Dataset/SympScan/Diseases_and_Symptoms_dataset.csv"

print(f"Loading dataset from: {csv_path} ...")
try:
    train_ds = SymptomDiseaseDataset(csv_path)
    print("‚úÖ Success! 'train_ds' is now defined.")
    print(f"   - Symptoms found: {len(train_ds.symptom_names)}")
    print(f"   - Diseases found: {len(train_ds.disease_names)}")
except FileNotFoundError:
    print(f"‚ùå Error: Could not find file at {csv_path}. Please check the path.")

Loading dataset from: /content/drive/MyDrive/Dataset/SympScan/Diseases_and_Symptoms_dataset.csv ...
‚úÖ Success! 'train_ds' is now defined.
   - Symptoms found: 230
   - Diseases found: 100


In [None]:
# ==========================================
# üß™ EXECUTE LIVE TESTS
# ==========================================

def test_scenario(model, symptoms, expected_disease_hint):
    print(f"\n==================================================")
    print(f"üßê TESTING SCENARIO: {expected_disease_hint}")
    print(f"==================================================")

    # Run diagnosis
    diagnose_and_prescribe(model, symptoms)

# --- Scenario 1: Mental Health Check ---
test_scenario(
    student,
    ['anxiety and nervousness', 'shortness of breath', 'palpitations', 'chest tightness'],
    "Expected: Panic Disorder"
)

# --- Scenario 2: Common Infection ---
test_scenario(
    student,
    ['sore throat', 'fever', 'difficulty in swallowing', 'swollen or red tonsils'],
    "Expected: Strep Throat"
)

# --- Scenario 3: Physical Condition ---
test_scenario(
    student,
    ['back pain', 'neck pain', 'weakness', 'leg pain'],
    "Expected: Herniated Disk / Spondylosis"
)


üßê TESTING SCENARIO: Expected: Panic Disorder

--- Analyzing Symptoms: ['anxiety and nervousness', 'shortness of breath', 'palpitations', 'chest tightness'] ---
  >> Diagnosis: PANIC DISORDER (97.67%)

--- üíä TREATMENT PLAN FOR: PANIC DISORDER ---
ü•ó Diet:        ['Magnesium-rich foods (spinach, pumpkin seeds, almonds)', 'Omega-3 fatty acids (salmon, flaxseeds, walnuts)', 'Complex carbs (oats, quinoa)', 'Green tea (L-theanine)', 'Limit caffeine and sugar']
üíä Medication:  ['SSRIs (e.g., Sertraline, Fluoxetine)', 'Benzodiazepines (e.g., Clonazepam, Alprazolam)', 'SNRIs (e.g., Venlafaxine)', 'Beta-blockers', 'Cognitive Behavioral Therapy (CBT)']
üèÉ Workout:     ["Deep breathing exercises: Calm your mind by focusing on slow, deep breaths", "Yoga: Combines breathing and movement for relaxation", "Mindfulness meditation: Helps reduce anxiety by staying present", "Regular aerobic exercise: Boosts mood and reduces stress"]
‚ö†Ô∏è Precautions: Practice deep breathing, Avoid caffeine

In [None]:
# --- Scenario 4: Respiratory Condition ---
# Testing specifically for Asthma keywords
test_scenario(
    student,
    ['wheezing', 'cough', 'shortness of breath', 'chest tightness'],
    "Expected: Asthma"
)


üßê TESTING SCENARIO: Expected: Asthma

--- Analyzing Symptoms: ['wheezing', 'cough', 'shortness of breath', 'chest tightness'] ---
  >> Diagnosis: CHRONIC OBSTRUCTIVE PULMONARY DISEASE (COPD) (41.46%)

--- üíä TREATMENT PLAN FOR: CHRONIC OBSTRUCTIVE PULMONARY DISEASE (COPD) ---
ü•ó Diet:        ['Anti-inflammatory foods (turmeric, ginger, berries)', 'Omega-3 fatty acids (wild salmon, walnuts)', 'High-protein foods (chicken, beans)', 'Vitamin C-rich foods (oranges, broccoli)', 'Hydration']
üíä Medication:  ['Bronchodilators (e.g., Salbutamol)', 'Inhaled corticosteroids', 'Phosphodiesterase-4 inhibitors (e.g., Roflumilast)', 'Oxygen therapy', 'Antibiotics during exacerbations']
üèÉ Workout:     ["Pursed-lip breathing: Improve oxygen use", "Walking: Build endurance safely", "Stationary biking: Low strain on lungs", "Pulmonary rehabilitation exercises: Doctor-guided regimens"]
‚ö†Ô∏è Precautions: 


## 8. EXPORTING ARTIFACTS

In [None]:
print("\n[7/7] Saving Artifacts...")

# 1. Save Student Weights (The Nano Model)
torch.save(student.state_dict(), "mexar_nano_student.pth")

# 2. Save Complete Metadata
# This is crucial for the app. It connects the 0/1 inputs to real symptom names,
# and connects the output integers 0,1,2 to real disease names + advice.
metadata = {
    "model_type": "Knowledge Distillation (Teacher-Student)",
    "input_dim": input_dim,
    "num_classes": num_classes,
    "symptom_names": symptom_names,     # List of strings [symptom1, symptom2...]
    "disease_names": disease_names,     # List of strings [disease1, disease2...] (LabelEncoder classes)
    "knowledge_base": full_knowledge_base # The merged CSV data
}

with open("mexar_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)

print("üíæ Saved: 'mexar_nano_student.pth'")
print("üíæ Saved: 'mexar_metadata.pkl'")
print("\nüî• MEXAR Nano Update Complete.")


[7/7] Saving Artifacts...
üíæ Saved: 'mexar_nano_student.pth'
üíæ Saved: 'mexar_metadata.pkl'

üî• MEXAR Nano Update Complete.
