In [291]:
import torch


# Naive Bayes Classifier

1. Naive Bayes is a probabilistic classifier that uses conditional probability to classify the classes.
2. The core formula is:  
   $$
   P(y|X) = \frac{P(X|y) \cdot P(y)}{P(X)}
   $$
3. In this implementation, we have 3 features, 10 samples, and 2 classes.
4. The formula for calculating \( P(y = 0|X) \) is:  
   $$
   P(y = 0|X) = \frac{P(x_1|y = 0) \cdot P(x_2|y = 0) \cdot \dots \cdot P(x_{10}|y = 0) \cdot P(y = 0)}{P(X)}
   $$
5. Similarly, the formula for calculating \( P(y = 1|X) \) is:  
   $$
   P(y = 1|X) = \frac{P(x_1|y = 1) \cdot P(x_2|y = 1) \cdot \dots \cdot P(x_{10}|y = 1) \cdot P(y = 1)}{P(X)}
   $$
6. Applying the **log transformation** and finding the **Maximum Likelihood Estimation (MLE)**:  
   $$
   \arg\max_y \log P(X|y) \cdot P(y) / P(X)
   $$

## Assumptions

1. **Feature Independence**:  
   The features \( x_1, x_2, ..., x_n \) are assumed to be conditionally independent given the class \( y \).  
   This is why:  
   $$
   P(X|y) = P(x_1|y) \cdot P(x_2|y) \cdot \dots \cdot P(x_n|y)
   $$
   
2. **Feature Distribution**:  
   - **Gaussian Naive Bayes**: For continuous features, we assume that for a given value of \( y \), the features \( x_1, x_2, ..., x_n \) follow a normal (Gaussian) distribution.
   
## Types of Naive Bayes Models Based on Feature Distribution

1. **Gaussian Naive Bayes**:  
   Assumes the features follow a Gaussian (normal) distribution.
   
2. **Bernoulli Naive Bayes**:  
   Assumes binary features, i.e., presence/absence of a feature.
   
3. **Multinomial Naive Bayes**:  
   Assumes the features follow a multinomial distribution. Typically used for text classification, where the features are word counts.


# Implementing Naive Bayes


In [314]:
import torch

# Example data: 10 samples, 3 features
x = torch.randn([10, 10])
y = torch.bernoulli(torch.full((10,), 0.7)).int()  # Binary class labels: 0 or 1
xtest = torch.randn(1, 10)  # A new test sample (1 x 3 features)

# Count the number of unique classes and their frequencies
cls_value, cls_count = torch.unique(y, return_counts=True)
yclass, yclass_count = torch.unique(y, return_counts=True)

n_class = yclass.shape[0]  # Number of unique classes (2 in this case: 0 and 1)
n_features = x.shape[1]    # Number of features (3 features in this case)

# Initialize containers for class means, variances, and class priors
cmean = torch.zeros([n_class, n_features])  # Mean of each feature for each class
cvar = torch.zeros([n_class, n_features])   # Variance of each feature for each class
ycls = torch.zeros(n_class)  # Class priors (probability of each class)

# Calculate class means, variances, and priors
for i, cls in enumerate(yclass):
    x_cls = x[y == cls]  # Get all samples corresponding to the class
    cmean[i] = x_cls.mean(dim=0)  # Mean of features for this class
    cvar[i] = x_cls.var(dim=0, unbiased=False)  # Variance of features for this class
    ycls[i] = cls.item()  # Store the class label (0 or 1) for each class

# Compute class priors (P(class))
for i, count in enumerate(cls_count):
    ycls_prob = count / y.shape[0]  # P(class)
    ycls[i] = ycls_prob

print("Class means:\n", cmean)
print("Class variances:\n", cvar)
print("Class priors (P(class)):\n", ycls)

def norm_pdf(x, mu, var):
    """Compute the normal probability density function."""
    return torch.exp(-0.5 * ((x - mu) ** 2) / var) / (2 * 3.14 * var) ** 0.5

def log(z):
    """Compute the logarithm of a tensor."""
    return torch.log(z)

def predict(xtest, cmean, cvar, yclass_count, yclass):
    """
    Predict the class of a new sample using Gaussian Naive Bayes.
    Arguments:
    - xtest: Test sample (1 x n_features)
    - cmean: Mean values for each feature per class
    - cvar: Variance values for each feature per class
    - yclass_count: Count of samples per class (used for P(class))
    - yclass: Class labels (0 and 1)

    Returns:
    - Predicted class
    """
    n_classes = cmean.shape[0]
    log_posterior = torch.zeros(n_classes)

    # Compute log likelihood and log prior for each class
    for i in range(n_classes):
        # Calculate the normal PDF for each feature in the test sample for class i
        log_likelihood = torch.sum(log(norm_pdf(xtest, cmean[i], cvar[i])))  # Sum of log(P(x_i | class))
        log_prior = log(yclass_count[i])  # Log(P(class))
        
        # Total log posterior (log of P(x | class) * P(class))
        log_posterior[i] = log_likelihood + log_prior

    # Choose the class with the highest posterior probability
    predicted_class = torch.argmax(log_posterior)
    return predicted_class.item()

# Example of predicting a new sample
predicted_class = predict(xtest, cmean, cvar, yclass_count, yclass)
print("Test sample:", xtest)
print("Predicted class:", predicted_class)


Class means:
 tensor([[-0.6612,  0.7808,  0.4939,  1.3309,  0.4079, -0.1055,  0.1198,  0.2943,
         -1.0946,  0.2843],
        [ 0.4768, -0.2492,  0.7111, -0.1231, -0.3066,  0.1550, -0.3000, -0.7018,
          0.1467,  0.6364]])
Class variances:
 tensor([[9.4736e-04, 2.8718e-01, 1.0667e-01, 1.0123e-03, 1.0996e+00, 2.6818e-02,
         7.4482e-01, 2.0912e-01, 2.4093e+00, 5.9047e-01],
        [9.4817e-01, 1.6443e+00, 9.4338e-02, 1.2655e+00, 3.7627e-01, 8.0746e-01,
         1.6643e+00, 4.9312e-01, 7.6913e-01, 2.0216e-01]])
Class priors (P(class)):
 tensor([0.2000, 0.8000])
Test sample: tensor([[-1.3592,  0.1304, -0.8333, -0.7610, -0.1195,  1.3507, -0.0998,  0.5311,
          0.9302, -0.1602]])
Predicted class: 1


# Pytorchifying Naive Bayes

In [315]:
class NaiveBayes:

    def fit(self, xtrain, ytrain):
        self.cls_value, self.cls_count = torch.unique(ytrain, return_counts=True)
        self.yclass, self.yclass_count = torch.unique(ytrain, return_counts=True)
        self.n_class = self.yclass.shape[0]  # Number of unique classes (2 in this case: 0 and 1)
        self.n_features = xtrain.shape[1]
        self.cmean = torch.zeros([self.n_class, self.n_features])  # Mean of each feature for each class
        self.cvar = torch.zeros([self.n_class, self.n_features])   # Variance of each feature for each class
        self.ycls = torch.zeros(self.n_class)   

        for i, cls in enumerate(self.yclass):
            x_cls = xtrain[ytrain == cls]  # Get all samples corresponding to the class
            self.cmean[i] = x_cls.mean(dim=0)  # Mean of features for this class
            self.cvar[i] = x_cls.var(dim=0, unbiased=False)  # Variance of features for this class
            self.ycls[i] = self.cls_count[i].item() / ytrain.shape[0]  # Class prior P(class)
        
          # Returning self allows method chaining
         

    def predict(self, xtest):
        self.n_classes = self.cmean.shape[0]
        log_posterior = torch.zeros(self.n_classes)

        # Compute log likelihood and log prior for each class
        for i in range(self.n_classes):
            # Calculate the normal PDF for each feature in the test sample for class i
            log_likelihood = torch.sum(log(norm_pdf(xtest, self.cmean[i], self.cvar[i])))  # Sum of log(P(x_i | class))
            log_prior = log(self.yclass_count[i])  # Log(P(class))
            
            # Total log posterior (log of P(x | class) * P(class))
            log_posterior[i] = log_likelihood + log_prior

        # Choose the class with the highest posterior probability
        predicted_class = torch.argmax(log_posterior)
        return predicted_class.item()
        

    def norm_pdf(self, xtest, mu, var):
        return torch.exp(-0.5 * ((xtest - mu) ** 2) / var) / (2 * 3.14 * var) ** 0.5
         
    def log(z):
        return torch.log(z)
     


obj = NaiveBayes()
obj1 = obj.fit(x,y)
cls_predict =  obj.predict(xtest)
cls_predict

1