In [2]:
# 1. Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [3]:
# 2. Load the iris dataset
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
# 3. Feature Selection and Preprocessing
# We'll use all features to predict the 'species' column

X = df.drop('Species', axis=1)
y = df['Species']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# 4. Implement Naive Bayes classifier
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

# Predicting the test set
y_pred = naive_bayes.predict(X_test)

In [7]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
TP = cm[1,1]
TN = cm[0,0]
FP = cm[0,2]
FN = cm[2,0]

print("Confusion Matrix:\n", cm)
print(f"\nTP: {TP}, FP: {FP}, TN: {TN}, FN: {FN}")

# Accuracy, Error Rate, Precision, Recall
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred, average='weighted')  # for multi-class classification
recall = recall_score(y_test, y_pred, average='weighted')

print(f"\nAccuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")
print(f"Precision (Weighted): {precision:.2f}")
print(f"Recall (Weighted): {recall:.2f}")

Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]

TP: 13, FP: 0, TN: 19, FN: 0

Accuracy: 1.00
Error Rate: 0.00
Precision (Weighted): 1.00
Recall (Weighted): 1.00
