# **Naive Bayes Classifier for Malaria Prediction**
This notebook demonstrates how to apply the Naive Bayes algorithm to predict malaria using symptom data.

In [15]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [4]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [17]:
# Step 2: Load the Dataset
df = pd.read_csv('malaria_symptoms_dataset.csv')
df.head()

Unnamed: 0,malaria,fever,chills,headache,nausea,sweating
0,yes,no,yes,yes,yes,yes
1,yes,yes,yes,yes,yes,no
2,no,no,yes,yes,no,no
3,yes,yes,yes,yes,yes,yes
4,no,no,yes,no,no,yes


In [18]:
# Step 3: Encode Categorical Variables
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

df.head()

Unnamed: 0,malaria,fever,chills,headache,nausea,sweating
0,1,0,1,1,1,1
1,1,1,1,1,1,0
2,0,0,1,1,0,0
3,1,1,1,1,1,1
4,0,0,1,0,0,1


In [19]:
# Step 4: Split the Data
X = df.drop('malaria', axis=1)
y = df['malaria']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
# Step 5: Train Naive Bayes Classifier
model = CategoricalNB()
model.fit(X_train, y_train)

In [21]:
# Step 6: Make Predictions and Evaluate
y_pred = model.predict(X_test)



In [22]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9222222222222223

Confusion Matrix:
 [[48  3]
 [ 4 35]]

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93        51
           1       0.92      0.90      0.91        39

    accuracy                           0.92        90
   macro avg       0.92      0.92      0.92        90
weighted avg       0.92      0.92      0.92        90

