# Data Analytics III - Naïve Bayes on Iris Dataset

1. Implement Simple Naïve Bayes classification algorithm using Python on the iris dataset.
2. Compute Confusion matrix to find TP, FP, TN, FN, Accuracy, Error rate, Precision, Recall.

In [None]:
# Import necessary libraries
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load the Iris dataset
iris = sns.load_dataset('iris')

# Display first few rows
iris.head()

In [None]:
# Separate features and target
X = iris.drop('species', axis=1)
y = iris['species']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train Naïve Bayes classifier
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred, labels=model.classes_)
conf_matrix_df = pd.DataFrame(conf_matrix, index=model.classes_, columns=model.classes_)
print("Confusion Matrix:")
print(conf_matrix_df)

In [None]:
# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=model.classes_))

In [None]:
# Accuracy and Error Rate
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy

print(f"Accuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")

In [None]:
# Calculate TP, FP, FN, TN for each class
labels = model.classes_
conf_matrix = confusion_matrix(y_test, y_pred, labels=labels)
print("Confusion Matrix:\n", pd.DataFrame(conf_matrix, index=labels, columns=labels))

for i, label in enumerate(labels):
    TP = conf_matrix[i, i]
    FP = conf_matrix[:, i].sum() - TP
    FN = conf_matrix[i, :].sum() - TP
    TN = conf_matrix.sum() - (TP + FP + FN)
    
    print(f"\nClass: {label}")
    print(f"TP: {TP}, FP: {FP}, FN: {FN}, TN: {TN}")
    
    accuracy = (TP + TN) / (TP + FP + FN + TN)
    error_rate = 1 - accuracy
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Error Rate: {error_rate:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")