## Naive Bayes' Classification Algorithm

### Categorical Naive Bayes

In [1]:
class CategoricalNaiveBayes:
    def __init__(self):
        self.class_log_prior_ = None
        self.feature_log_prob_ = None
        self.classes_ = None

    def fit(self, X, y):
        """ Fit the Naive Bayes classifier to the training data.
        Args:
            X (list of list): The features of the training data.
            y (list): The target labels.
        """
        # Convert data to internal representation
        from collections import defaultdict
        import numpy as np

        # Calculate class priors
        unique_classes = set(y)
        class_count = {cls: y.count(cls) for cls in unique_classes}
        total_samples = len(y)
        self.classes_ = list(unique_classes)
        self.class_log_prior_ = {cls: np.log(class_count[cls] / total_samples) for cls in unique_classes}

        # Calculate feature probabilities per class
        feature_counts = {cls: [defaultdict(int) for _ in range(len(X[0]))] for cls in unique_classes}
        for xi, yi in zip(X, y):
            for idx, feature in enumerate(xi):
                feature_counts[yi][idx][feature] += 1

        self.feature_log_prob_ = {
            cls: [{feat: np.log((feature_counts[cls][idx].get(feat, 0) + 1) / (class_count[cls] + len(feature_counts[cls][idx])))
                   for feat in feature_counts[cls][idx]} for idx in range(len(X[0]))]
            for cls in unique_classes
        }

    def predict(self, X):
        """ Predict class labels for the input features.
        Args:
            X (list of list): The features to predict.
        """
        import numpy as np
        predictions = []
        for xi in X:
            class_probs = {}
            for cls in self.classes_:
                total_log_prob = self.class_log_prior_[cls]
                for idx, feature in enumerate(xi):
                    total_log_prob += self.feature_log_prob_[cls][idx].get(feature, -np.inf)  # handle unseen features
                class_probs[cls] = total_log_prob
            predictions.append(max(class_probs, key=class_probs.get))
        return predictions

# Example usage
data = [['sunny', 'hot', 'high', False],
        ['sunny', 'hot', 'high', True],
        ['overcast', 'hot', 'high', False],
        ['rainy', 'mild', 'high', False],
        ['rainy', 'cool', 'normal', False]]

labels = ['no', 'no', 'yes', 'yes', 'no']

# Create and train the classifier
model = CategoricalNaiveBayes()
model.fit(data, labels)

# Make predictions
test_data = [['rainy', 'cool', 'normal', True],
             ['sunny', 'mild', 'high', False]]

print(model.predict(test_data))


['no', 'no']


### Gaussian Naive Bayes

In [2]:
import numpy as np

class GaussianNaiveBayes:
    def __init__(self):
        self.means = {}
        self.variances = {}
        self.priors = {}
        self.classes = None

    def fit(self, X, y):
        """ Fit the Naive Bayes classifier to the training data.
        Args:
            X (numpy.ndarray): The features of the training data.
            y (numpy.ndarray): The target labels.
        """
        self.classes = np.unique(y)
        for cls in self.classes:
            X_cls = X[y == cls]
            self.means[cls] = np.mean(X_cls, axis=0)
            self.variances[cls] = np.var(X_cls, axis=0)
            self.priors[cls] = X_cls.shape[0] / X.shape[0]

    def predict_proba(self, X):
        """ Compute probabilities of possible outcomes for samples in X.
        Args:
            X (numpy.ndarray): The input features.
        """
        eps = 1e-6  # to avoid division by zero in variance
        likelihoods = []
        for cls in self.classes:
            prior = np.log(self.priors[cls])
            class_mean = self.means[cls]
            class_var = self.variances[cls]
            exponent = -0.5 * np.sum(((X - class_mean) ** 2) / (class_var + eps), axis=1)
            log_prob = exponent - 0.5 * np.sum(np.log(2. * np.pi * (class_var + eps)))
            total_log_prob = prior + log_prob
            likelihoods.append(total_log_prob)
        return np.column_stack(likelihoods)

    def predict(self, X):
        """ Perform classification on an array of test vectors X.
        Args:
            X (numpy.ndarray): The input features.
        """
        log_probs = self.predict_proba(X)
        return self.classes[np.argmax(log_probs, axis=1)]

# Example usage
# Create some example data
np.random.seed(0)
X = np.random.rand(100, 3)  # 100 samples, 3 features
y = np.random.choice(['red', 'blue'], 100)  # Binary target

# Create and train the classifier
model = GaussianNaiveBayes()
model.fit(X, y)

# Make predictions
test_data = np.random.rand(5, 3)  # Some new random test data
predictions = model.predict(test_data)

print("Predictions:", predictions)


Predictions: ['blue' 'red' 'blue' 'red' 'red']
