In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report

In [2]:
# Sample dataset
data = pd.DataFrame({
    'Symptoms': [
        "fever, cough, sore throat", 
        "fever, body aches, fatigue", 
        "runny nose, sore throat, fatigue", 
        "cough, shortness of breath", 
        "fever, chills, body aches",
        "headache, nausea, vomiting", 
        "fever, cough, body aches", 
        "nausea, stomach pain, vomiting", 
        "fatigue, cough, sore throat", 
        "fatigue, fever, chills"
    ],
    'Disease': ['disease A', 'disease A', 'disease B', 'disease A', 'disease A', 
                'disease B', 'disease A', 'disease B', 'disease A', 'disease A']
})

In [3]:
# Encode labels (disease A = 1, disease B = 0)
data['Disease'] = data['Disease'].map({'disease A': 1, 'disease B': 0})

In [4]:
# Features and target
X = data['Symptoms']
y = data['Disease']

In [5]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Convert symptoms text into feature vectors
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [7]:
# Train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vectorized, y_train)

In [8]:
# Predict on test data
y_pred = model.predict(X_test_vectorized)

In [9]:
# Evaluate the model
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['disease B', 'disease A']))



Classification Report:
              precision    recall  f1-score   support

   disease B       1.00      1.00      1.00         1
   disease A       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

