<a href="https://colab.research.google.com/github/aditisingh2912/Data-Mining-/blob/main/NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, cohen_kappa_score, roc_curve
from sklearn.preprocessing import LabelEncoder

iris_df = pd.read_csv("/content/drive/MyDrive/Iris.csv")
X_iris = iris_df.iloc[:, :-1].values
y_iris = LabelEncoder().fit_transform(iris_df.iloc[:, -1].values)

spect_df = pd.read_csv("/content/drive/MyDrive/SPECT.train", header=None)
X_spect = spect_df.iloc[:, 1:].values
y_spect = spect_df.iloc[:, 0].values

class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.variance = {}
        self.prior = {}

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.variance[c] = np.var(X_c, axis=0) + 1e-6
            self.prior[c] = len(X_c) / len(X)

    def gaussian_probability(self, x, mean, variance):
        exponent = np.exp(-((x - mean) ** 2) / (2 * variance))
        return (1 / np.sqrt(2 * np.pi * variance)) * exponent

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.prior[c])
                likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
                posterior = prior + likelihood
                posteriors.append(posterior)
            predictions.append(self.classes[np.argmax(posteriors)])
        return np.array(predictions)

    def predict_proba(self, X):
        probabilities = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.prior[c])
                likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
                posterior = prior + likelihood
                posteriors.append(posterior)
            exp_posteriors = np.exp(posteriors - np.max(posteriors))
            probabilities.append(exp_posteriors / np.sum(exp_posteriors))
        return np.array(probabilities)


def evaluate_naive_bayes(X, y, dataset_name):
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    metrics = []

    print(f"\nEvaluating {dataset_name} dataset:\n")
    for fold, (train_idx, test_idx) in enumerate(kf.split(X)):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = NaiveBayes()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        conf_matrix = confusion_matrix(y_test, y_pred)
        kappa = cohen_kappa_score(y_test, y_pred)

        if len(np.unique(y)) == 2:
            fp_rate, tp_rate, _ = roc_curve(y_test, y_proba[:, 1])
            fp_rate = fp_rate[1]
            tp_rate = tp_rate[1]
        else:
            fp_rate, tp_rate = np.nan, np.nan

        correct_instances = np.sum(y_test == y_pred)
        incorrect_instances = np.sum(y_test != y_pred)

        metrics.append([accuracy, precision, recall, f1, kappa, tp_rate, fp_rate])

        print(f"Fold {fold+1}:")
        print(f"Correctly Classified Instances: {correct_instances}")
        print(f"Incorrectly Classified Instances: {incorrect_instances}")
        print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, Kappa: {kappa:.4f}")
        print(f"Confusion Matrix:\n{conf_matrix}\n")

    metrics = np.array(metrics)
    avg_metrics = np.mean(metrics, axis=0)
    print("Final Average Metrics:")
    print(f"Accuracy: {avg_metrics[0]:.4f}, Precision: {avg_metrics[1]:.4f}, Recall: {avg_metrics[2]:.4f}, F1-score: {avg_metrics[3]:.4f}, Kappa: {avg_metrics[4]:.4f}")
    print(f"True Positive Rate: {avg_metrics[5]:.4f}, False Positive Rate: {avg_metrics[6]:.4f}\n")


evaluate_naive_bayes(X_iris, y_iris, "IRIS")
evaluate_naive_bayes(X_spect, y_spect, "HEART (SPECT)")



Evaluating IRIS dataset:

Fold 1:
Correctly Classified Instances: 15
Incorrectly Classified Instances: 0
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Kappa: 1.0000
Confusion Matrix:
[[6 0 0]
 [0 6 0]
 [0 0 3]]

Fold 2:
Correctly Classified Instances: 15
Incorrectly Classified Instances: 0
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Kappa: 1.0000
Confusion Matrix:
[[4 0 0]
 [0 3 0]
 [0 0 8]]

Fold 3:
Correctly Classified Instances: 15
Incorrectly Classified Instances: 0
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Kappa: 1.0000
Confusion Matrix:
[[9 0 0]
 [0 4 0]
 [0 0 2]]

Fold 4:
Correctly Classified Instances: 15
Incorrectly Classified Instances: 0
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Kappa: 1.0000
Confusion Matrix:
[[4 0 0]
 [0 6 0]
 [0 0 5]]

Fold 5:
Correctly Classified Instances: 15
Incorrectly Classified Instances: 0
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.000

  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.variance[c])))
  likelihood = np.sum(np.log(self.gaussian_probability(x, self.mean[c], self.var