# Code: U21EC019

## Naive Bayes Algorithm

In [94]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    f1_score,
    classification_report)

In [95]:
data = load_iris()
x=data.data
y=data.target
print(x.shape)
print(y)

(150, 4)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [96]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
# seperate data for each class from training dataset
class_data = {0: [], 1: [], 2: []}

for i in range(len(x_train)):
    if y_train[i] == 0:
        class_data[0].append(x_train[i])
    elif y_train[i] == 1:
        class_data[1].append(x_train[i])
    elif y_train[i] == 2:
        class_data[2].append(x_train[i])

for c in class_data:
    class_data[c] = np.array(class_data[c])

In [97]:
# Initialize dictionaries to store the mean and std deviation for each class
means = {}
std_devs = {}

# Calculate the mean and standard deviation for each feature in each class
for c in class_data:
    means[c] = np.mean(class_data[c], axis=0)
    std_devs[c] = np.std(class_data[c], axis=0)

for c in class_data:
    print(f"Class {c}:")
    print(f"Mean: {means[c]}")
    print(f"Standard Deviation: {std_devs[c]}\n")

Class 0:
Mean: [5.02051282 3.4025641  1.46153846 0.24102564]
Standard Deviation: [0.35961481 0.37654787 0.14253274 0.10553393]

Class 1:
Mean: [5.88648649 2.76216216 4.21621622 1.32432432]
Standard Deviation: [0.51368418 0.32244954 0.47959077 0.20189026]

Class 2:
Mean: [6.63863636 2.98863636 5.56590909 2.03181818]
Standard Deviation: [0.62385018 0.3283678  0.54269663 0.25385458]



In [102]:
def gaussian_pdf(x, mean, std_dev):
    """
    Compute the Gaussian probability density function for a given input vector x.

    Parameters:
    x (numpy array): Input vector of shape (1, n_features).
    mean (numpy array): Mean vector of shape (1, n_features).
    std_dev (numpy array): Standard deviation vector of shape (1, n_features).

    Returns:
    numpy array: Gaussian probabilities for each feature.
    """
    # Compute the Gaussian PDF for each feature
    exponent = np.exp(-0.5 * ((x - mean) ** 2 / (std_dev ** 2)))
    gaussian_prob = (1 / (np.sqrt(2 * np.pi) * std_dev)) * exponent
    
    return gaussian_prob

gaussian_probabilities_class0 = gaussian_pdf(x, means[0], std_devs[0])
gaussian_probabilities_class1 = gaussian_pdf(x, means[1], std_devs[1])
gaussian_probabilities_class2 = gaussian_pdf(x, means[0], std_devs[2])

print("Gaussian Probabilities class 0:", gaussian_probabilities_class0)
print("Gaussian Probabilities class 1:", gaussian_probabilities_class1)
print("Gaussian Probabilities class 2:", gaussian_probabilities_class2)


Gaussian Probabilities class 0: [[1.08258878e+000 1.02459042e+000 2.54986684e+000 3.50511547e+000]
 [1.04878437e+000 5.98273850e-001 2.54986684e+000 3.50511547e+000]
 [7.45727375e-001 9.16746960e-001 1.47257418e+000 3.50511547e+000]
 [5.59955769e-001 7.67166520e-001 2.69888103e+000 3.50511547e+000]
 [1.10755669e+000 9.23402391e-001 2.54986684e+000 3.50511547e+000]
 [6.35715759e-001 4.42725368e-001 6.90548174e-001 1.21554385e+000]
 [5.59955769e-001 1.05944839e+000 2.54986684e+000 3.23375808e+000]
 [1.10755669e+000 1.05944839e+000 2.69888103e+000 3.50511547e+000]
 [2.50353918e-001 4.34791071e-001 2.54986684e+000 3.50511547e+000]
 [1.04878437e+000 7.67166520e-001 2.69888103e+000 1.54794327e+000]
 [6.35715759e-001 7.75535917e-001 2.69888103e+000 3.50511547e+000]
 [9.19230122e-001 1.05944839e+000 1.74612729e+000 3.50511547e+000]
 [9.19230122e-001 5.98273850e-001 2.54986684e+000 1.54794327e+000]
 [1.49066971e-001 5.98273850e-001 1.12169375e-001 1.54794327e+000]
 [1.05887517e-001 3.00923413e-

In [99]:
def predict_naive_bayes(x, means, std_devs, priors):
    """
    Predict the class label for a given input vector using the Naive Bayes classifier.

    Parameters:
    x (numpy array): Input vector of shape (1, n_features).
    means (dict): Mean vectors for each class.
    std_devs (dict): Standard deviation vectors for each class.
    priors (dict): Prior probabilities for each class.

    Returns:
    int: Predicted class label.
    """
    posteriors = {}

    # Calculate the posterior probability for each class
    for c in means.keys():
        # Calculate the likelihood using Gaussian PDF
        likelihood = gaussian_pdf(x, means[c], std_devs[c])
        # Multiply the likelihoods across all features (equivalent to summing the log-likelihoods)
        total_likelihood = np.prod(likelihood)
        # Calculate the posterior probability: P(class|data) = likelihood * prior
        posteriors[c] = total_likelihood * priors[c]

    # Return the class with the highest posterior probability
    return max(posteriors, key=posteriors.get)

# Example usage:
# Calculate prior probabilities (you can use the training set for this)
n_class0 = len(class_data[0])
n_class1 = len(class_data[1])
n_class2 = len(class_data[2])
total_samples = len(x_train)

priors = {
    0: n_class0 / total_samples,
    1: n_class1 / total_samples,
    2: n_class2 / total_samples
}

def naive_bayes_predict(x_test, means, std_devs, priors):
    """
    Predict the class labels for all input vectors in the test set using the Naive Bayes classifier.

    Parameters:
    x_test (numpy array): Test set of shape (n_samples, n_features).
    means (dict): Mean vectors for each class.
    std_devs (dict): Standard deviation vectors for each class.
    priors (dict): Prior probabilities for each class.

    Returns:
    numpy array: Predicted class labels for all test samples.
    """
    predictions = []
    
    # Loop through all test samples
    for x in x_test:
        predicted_class = predict_naive_bayes(x, means, std_devs, priors)
        predictions.append(predicted_class)
    
    return np.array(predictions)

# Predict for all test samples
predicted_classes = naive_bayes_predict(x_test, means, std_devs, priors)

# Print the actual vs predicted values
print("Actual Values:", y_test)
print("Predicted Values:", predicted_classes)

Actual Values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
Predicted Values: [2 1 0 2 0 2 0 1 1 1 1 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]


In [100]:
# Calculate accuracy
accuracy = accuracy_score(y_test, predicted_classes)

# confusion matrix
conf_matrix = confusion_matrix(y_test, predicted_classes)
class_report = classification_report(y_test, predicted_classes)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Accuracy: 96.67%

Confusion Matrix:
[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.93      1.00      0.96        13
           2       1.00      0.83      0.91         6

    accuracy                           0.97        30
   macro avg       0.98      0.94      0.96        30
weighted avg       0.97      0.97      0.97        30

