<a href="https://colab.research.google.com/github/hrushikeshsahu19/ML_algorithm_custom_code/blob/main/NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

class NaiveBayes:
    def __init__(self):
        self.classes = None
        self.mean = None
        self.variance = None
        self.priors = None

    def fit(self, X, y):
        """
        Fit the Naive Bayes model to the training data.

        Parameters:
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        y (numpy.ndarray): Target vector of shape (n_samples,).
        """
        n_samples, n_features = X.shape
        self.classes = np.unique(y)  # Get unique class labels
        n_classes = len(self.classes)

        # Initialize mean, variance, and priors
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.variance = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]  # Filter samples belonging to class c
            self.mean[idx, :] = X_c.mean(axis=0)
            self.variance[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / n_samples  # P(c)

    def _gaussian_probability(self, class_idx, x):
        """
        Compute the Gaussian probability for a feature vector.

        Parameters:
        class_idx (int): Index of the class.
        x (numpy.ndarray): Feature vector of shape (n_features,).

        Returns:
        numpy.ndarray: Probabilities of each feature for the class.
        """
        mean = self.mean[class_idx]
        variance = self.variance[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * variance))
        denominator = np.sqrt(2 * np.pi * variance)
        return numerator / denominator

    def predict(self, X):
        """
        Predict class labels for the input data.

        Parameters:
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).

        Returns:
        numpy.ndarray: Predicted class labels of shape (n_samples,).
        """
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        posteriors = np.zeros((n_samples, n_classes))

        for idx, c in enumerate(self.classes):
            prior = np.log(self.priors[idx])  # Log of prior probability
            class_conditional = np.sum(np.log(self._gaussian_probability(idx, X)), axis=1)
            posteriors[:, idx] = prior + class_conditional

        return self.classes[np.argmax(posteriors, axis=1)]

# Example usage
if __name__ == "__main__":
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    from sklearn.datasets import make_classification

    # Generate a synthetic dataset
    X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the Naive Bayes model
    model = NaiveBayes()
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Evaluate the model
    print("Accuracy:", accuracy_score(y_test, predictions))


Accuracy: 0.81
