In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer
# Load dataset
cancer = load_breast_cancer()
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = pd.Series(cancer.target)
feature_columns = cancer.feature_names
target_column = "cancer_type"
print("Dataset: Breast Cancer Wisconsin")
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")
print("\n" + "="*50 + "\n")
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
print("Data split into training and testing sets.")
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")
print("\n" + "="*50 + "\n")
# --- Training Naive Bayes Classifier ---
print("--- Training Naive Bayes Classifier ---")
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_test)
accuracy_nb = accuracy_score(y_test, y_pred_nb)
report_nb = classification_report(y_test, y_pred_nb, target_names=cancer.target_names.astype(str))
print(f"Naive Bayes Accuracy: {accuracy_nb:.4f}")
print("Classification Report:\n", report_nb)
print("\n" + "="*50 + "\n")
# --- Training ID3-like Decision Tree ---
print("--- Training ID3-like Decision Tree ---")
id3_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
id3_model.fit(X_train, y_train)
y_pred_id3 = id3_model.predict(X_test)
accuracy_id3 = accuracy_score(y_test, y_pred_id3)
report_id3 = classification_report(y_test, y_pred_id3,
target_names=cancer.target_names.astype(str))
print(f"ID3-like Decision Tree Accuracy: {accuracy_id3:.4f}")
print("Classification Report:\n", report_id3)
print("\n" + "="*50 + "\n")
print("Comparison of Classifier Performance:")
print(f"Naive Bayes: {accuracy_nb:.4f} Accuracy")
print(f"ID3-like Decision Tree: {accuracy_id3:.4f} Accuracy")


Dataset: Breast Cancer Wisconsin
Number of samples: 569
Number of features: 30


Data split into training and testing sets.
Training set size: 455 samples
Testing set size: 114 samples


--- Training Naive Bayes Classifier ---
Naive Bayes Accuracy: 0.9737
Classification Report:
               precision    recall  f1-score   support

   malignant       1.00      0.93      0.96        43
      benign       0.96      1.00      0.98        71

    accuracy                           0.97       114
   macro avg       0.98      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



