In [None]:
# Import necessary libraries
import numpy as np  # For numerical operations
import pandas as pd  # For data manipulation and analysis
from sklearn.model_selection import train_test_split  # For splitting data into training and testing sets
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report  # For model evaluation metrics

# Load dataset
# The dataset 'diabetes.csv' is assumed to have features (columns) representing medical parameters
# and a target column indicating whether a patient has diabetes (1) or not (0).
data = pd.read_csv('/content/diabetes.csv')
X = data.iloc[:, :-1].values  # Features: all columns except the last one
y = data.iloc[:, -1].values   # Target: the last column

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Naive Bayes Classifier Implementation
class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        # Dictionaries to store class-specific statistics
        self.mean = {}       # Mean for each feature per class
        self.variance = {}   # Variance for each feature per class
        self.priors = {}     # Prior probabilities for each class

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.variance[c] = np.var(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

    # Calculates the Gaussian probability density function for continuous data
    def gaussian_pdf(self, x, mean, var):
        eps = 1e-9  # To avoid division by zero
        coef = 1 / np.sqrt(2 * np.pi * var + eps)
        exponent = np.exp(-((x - mean) ** 2) / (2 * var + eps))
        return coef * exponent

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                likelihood = np.sum(np.log(self.gaussian_pdf(x, self.mean[c], self.variance[c])))
                posterior = prior + likelihood
                posteriors.append(posterior)
            predictions.append(self.classes[np.argmax(posteriors)])
        return np.array(predictions)

# Train and evaluate model
model = NaiveBayes()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Performance metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7662337662337663
Confusion Matrix:
 [[79 20]
 [16 39]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.80      0.81        99
           1       0.66      0.71      0.68        55

    accuracy                           0.77       154
   macro avg       0.75      0.75      0.75       154
weighted avg       0.77      0.77      0.77       154

