## Bayesian Classification

$$P(L | \text{features}) = \frac{P(\text{features} | L) , P(L)}{P(\text{features})}$$

$$\frac{P(L_1 \mid \text{features})}{P(L_2 \mid \text{features})} = \frac{P(\text{features} \mid L_1)}{P(\text{features} \mid L_2)} 
\frac{P(L_1)}{P(L_2)}$$

In [15]:
import math

class NaiveBayes:
    def __init__(self):
        self.classes = [] # Initialize list of classes
        self.class_probabilities = {} # Initialize dictionary of class probabilities
        self.class_feature_probabilities = {} # Initialize dictionary of feature probabilities given each class

    def train(self, X, y):
        self.classes = list(set(y)) # Get unique class labels from the training data
        n_samples = len(X) # Get number of training samples
        n_features = len(X[0]) # Get number of features in each training sample

        # Compute prior probability for each class
        for class_ in self.classes:
            self.class_probabilities[class_] = sum([1 for label in y if label == class_]) / n_samples

        # Compute conditional probability of each feature given each class
        for class_ in self.classes:
            self.class_feature_probabilities[class_] = {}
            class_samples = [X[i] for i in range(n_samples) if y[i] == class_]
            n_class_samples = len(class_samples)
            for feature in range(n_features):
                feature_values = [class_samples[i][feature] for i in range(n_class_samples)]
                feature_value_counts = {}
                for feature_value in feature_values:
                    if feature_value not in feature_value_counts:
                        feature_value_counts[feature_value] = 0
                    feature_value_counts[feature_value] += 1
                for feature_value in feature_value_counts:
                    feature_value_counts[feature_value] /= n_class_samples
                self.class_feature_probabilities[class_][feature] = feature_value_counts

    def predict(self, X):
        predictions = []
        for i in range(len(X)):
            probabilities = {class_: math.log(self.class_probabilities[class_]) for class_ in self.classes}
            for class_ in self.classes:
                for feature, feature_value in enumerate(X[i]):
                    if feature_value in self.class_feature_probabilities[class_][feature]:
                        probabilities[class_] += math.log(self.class_feature_probabilities[class_][feature][feature_value])
            predicted_class = max(probabilities, key=probabilities.get)
            predictions.append(predicted_class)
        return predictions



In [17]:
# Testing
X_train = [[1, 'Small'], [1, 'Medium'], [1, 'Medium'], [1, 'Small'], [1, 'Small'], 
           [2, 'Small'], [2, 'Medium'], [2, 'Medium'], [2, 'Large'], [2, 'Large'],
           [3, 'Large'], [3, 'Medium'], [3, 'Medium'], [3, 'Large'], [3, 'Large']]

y_train = ['no', 'no', 'yes', 'yes', 'no', 
           'no', 'no', 'yes', 'yes', 'yes',
           'yes', 'yes', 'yes', 'yes', 'no']

X_test = [[2, 'Small'], [3, 'Medium'], [1, 'Small']]

y_test = ['no', 'yes', 'no']

nb = NaiveBayes()
nb.train(X_train, y_train)
y_pred = nb.predict(X_test)

# Count the number of correct predictions by comparing the predicted labels (y_pred) to the true labels (y_test)
num_correct = sum([1 for i in range(len(y_test)) if y_test[i] == y_pred[i]])
accuracy = num_correct / len(y_test)
print(f"Accuracy: {accuracy}")


Accuracy: 1.0
