In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [3]:
df = pd.read_csv('iris.csv')

In [4]:
print("First 5 rows:")
print(df.head())
print("\nDataset shape:", df.shape)
print("\nDescriptive statistics:")
print(df.describe())

First 5 rows:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Dataset shape: (150, 6)

Descriptive statistics:
               Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
count  150.000000     150.000000    150.000000     150.000000    150.000000
mean    75.500000       5.843333      3.054000       3.758667      1.198667
std     43.445368       0.828066      0.433594       1.764420      0.763161
min      1.000000       4.300000      2.000000       1.000000      0.100000
25%     38.250000       5.100000      2.800000       1.600000      0.300000
50%     75.500000 

In [5]:
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])

In [6]:
X = df.iloc[:, :-1]  # All columns except last
y = df.iloc[:, -1]   # Last column (variety)

## 2. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

In [8]:
y_pred = nb_classifier.predict(X_test)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)



Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [9]:
# Calculate TP, FP, TN, FN for each class
TP = np.diag(conf_matrix)
FP = conf_matrix.sum(axis=0) - TP
FN = conf_matrix.sum(axis=1) - TP
TN = conf_matrix.sum() - (TP + FP + FN)

print("\nClass-wise Performance:")
for i, class_name in enumerate(le.classes_):
    print(f"\nClass {class_name}:")
    print(f"True Positives (TP): {TP[i]}")
    print(f"False Positives (FP): {FP[i]}")
    print(f"True Negatives (TN): {TN[i]}")
    print(f"False Negatives (FN): {FN[i]}")



Class-wise Performance:

Class Iris-setosa:
True Positives (TP): 10
False Positives (FP): 0
True Negatives (TN): 20
False Negatives (FN): 0

Class Iris-versicolor:
True Positives (TP): 9
False Positives (FP): 0
True Negatives (TN): 21
False Negatives (FN): 0

Class Iris-virginica:
True Positives (TP): 11
False Positives (FP): 0
True Negatives (TN): 19
False Negatives (FN): 0


In [10]:
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print("\nOverall Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


Overall Performance Metrics:
Accuracy: 1.0000
Error Rate: 0.0000
Precision: 1.0000
Recall: 1.0000
