In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_wine
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


**Iris Dataset**

**Hold-Out Method**

In [2]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and test sets using hold-out method (75% train, 25% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize classifiers
nb_classifier = GaussianNB()
knn_classifier = KNeighborsClassifier(n_neighbors=3)
dt_classifier = DecisionTreeClassifier(random_state=42)

# Fit classifiers to training data
nb_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)

# Make predictions on test data
nb_predictions = nb_classifier.predict(X_test)
knn_predictions = knn_classifier.predict(X_test)
dt_predictions = dt_classifier.predict(X_test)

# Calculate accuracy for each classifier
nb_accuracy = accuracy_score(y_test, nb_predictions)
knn_accuracy = accuracy_score(y_test, knn_predictions)
dt_accuracy = accuracy_score(y_test, dt_predictions)

print("Accuracy using hold-out method:")
print(f"Naive Bayes: {nb_accuracy:.4f}")
print(f"K-Nearest Neighbors: {knn_accuracy:.4f}")
print(f"Decision Tree: {dt_accuracy:.4f}")
print()

Accuracy using hold-out method:
Naive Bayes: 1.0000
K-Nearest Neighbors: 1.0000
Decision Tree: 1.0000



**Random Subsampling Method**

In [3]:
# Now let's use random subsampling (66.6% train, 33.3% test)
X_train_sub, X_test_sub, y_train_sub, y_test_sub = train_test_split(X, y, test_size=0.333, random_state=42)

# Fit classifiers to subsampled training data
nb_classifier.fit(X_train_sub, y_train_sub)
knn_classifier.fit(X_train_sub, y_train_sub)
dt_classifier.fit(X_train_sub, y_train_sub)

# Make predictions on subsampled test data
nb_predictions_sub = nb_classifier.predict(X_test_sub)
knn_predictions_sub = knn_classifier.predict(X_test_sub)
dt_predictions_sub = dt_classifier.predict(X_test_sub)

# Calculate accuracy for each classifier with random subsampling
nb_accuracy_sub = accuracy_score(y_test_sub, nb_predictions_sub)
knn_accuracy_sub = accuracy_score(y_test_sub, knn_predictions_sub)
dt_accuracy_sub = accuracy_score(y_test_sub, dt_predictions_sub)

print("Accuracy using random subsampling:")
print(f"Naive Bayes: {nb_accuracy_sub:.4f}")
print(f"K-Nearest Neighbors: {knn_accuracy_sub:.4f}")
print(f"Decision Tree: {dt_accuracy_sub:.4f}")
print()

Accuracy using random subsampling:
Naive Bayes: 0.9600
K-Nearest Neighbors: 0.9800
Decision Tree: 0.9800



**Cross-Validation Method**

In [4]:
from sklearn.model_selection import cross_val_score

# Initialize classifiers
nb_classifier_cv = GaussianNB()
knn_classifier_cv = KNeighborsClassifier(n_neighbors=3)
dt_classifier_cv = DecisionTreeClassifier(random_state=42)

# Calculate cross-validated accuracy for each classifier
nb_cv_scores = cross_val_score(nb_classifier_cv, X, y, cv=5)
knn_cv_scores = cross_val_score(knn_classifier_cv, X, y, cv=5)
dt_cv_scores = cross_val_score(dt_classifier_cv, X, y, cv=5)

print("Accuracy using 5-fold cross-validation:")
print(f"Naive Bayes: {np.mean(nb_cv_scores):.4f}")
print(f"K-Nearest Neighbors: {np.mean(knn_cv_scores):.4f}")
print(f"Decision Tree: {np.mean(dt_cv_scores):.4f}")


Accuracy using 5-fold cross-validation:
Naive Bayes: 0.9533
K-Nearest Neighbors: 0.9667
Decision Tree: 0.9533


**Wine Dataset**

**Hold-Out Method**

In [7]:
# Load the Wine dataset
wine = load_wine()
X_wine, y_wine = wine.data, wine.target

# Split the dataset into training and test sets using hold-out method (75% train, 25% test)
X_train_, X_test_, y_train_, y_test_ = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize classifiers
nb_classifier_ = GaussianNB()
knn_classifier_ = KNeighborsClassifier(n_neighbors=3)
dt_classifier_ = DecisionTreeClassifier(random_state=42)

# Fit classifiers to training data
nb_classifier_.fit(X_train_, y_train_)
knn_classifier_.fit(X_train_, y_train_)
dt_classifier_.fit(X_train_, y_train_)

# Make predictions on test data
nb_predictions_ = nb_classifier_.predict(X_test_)
knn_predictions_ = knn_classifier_.predict(X_test_)
dt_predictions_ = dt_classifier_.predict(X_test_)

# Calculate accuracy for each classifier
nb_accuracy_ = accuracy_score(y_test_, nb_predictions_)
knn_accuracy_ = accuracy_score(y_test_, knn_predictions_)
dt_accuracy_ = accuracy_score(y_test_, dt_predictions_)

print("Accuracy using hold-out method:")
print(f"Naive Bayes: {nb_accuracy_:.4f}")
print(f"K-Nearest Neighbors: {knn_accuracy_:.4f}")
print(f"Decision Tree: {dt_accuracy_:.4f}")
print()

Accuracy using hold-out method:
Naive Bayes: 1.0000
K-Nearest Neighbors: 1.0000
Decision Tree: 1.0000



**Random Subsampling Method**

In [8]:
# Now let's use random subsampling (66.6% train, 33.3% test)
X_train_sub_, X_test_sub_, y_train_sub_, y_test_sub_ = train_test_split(X, y, test_size=0.333, random_state=42)

# Fit classifiers to subsampled training data
nb_classifier.fit(X_train_sub_, y_train_sub_)
knn_classifier.fit(X_train_sub_, y_train_sub_)
dt_classifier.fit(X_train_sub_, y_train_sub_)

# Make predictions on subsampled test data
nb_predictions_sub_ = nb_classifier.predict(X_test_sub_)
knn_predictions_sub_ = knn_classifier.predict(X_test_sub_)
dt_predictions_sub_ = dt_classifier.predict(X_test_sub_)

# Calculate accuracy for each classifier with random subsampling
nb_accuracy_sub_ = accuracy_score(y_test_sub_, nb_predictions_sub_)
knn_accuracy_sub_ = accuracy_score(y_test_sub_, knn_predictions_sub_)
dt_accuracy_sub_ = accuracy_score(y_test_sub_, dt_predictions_sub_)

print("Accuracy using random subsampling:")
print(f"Naive Bayes: {nb_accuracy_sub_:.4f}")
print(f"K-Nearest Neighbors: {knn_accuracy_sub_:.4f}")
print(f"Decision Tree: {dt_accuracy_sub_:.4f}")
print()

Accuracy using random subsampling:
Naive Bayes: 0.9600
K-Nearest Neighbors: 0.9800
Decision Tree: 0.9800



**Cross-Validation Method**

In [9]:
# Initialize classifiers
nb_classifier_cv_ = GaussianNB()
knn_classifier_cv_ = KNeighborsClassifier(n_neighbors=3)
dt_classifier_cv_ = DecisionTreeClassifier(random_state=42)

# Calculate cross-validated accuracy for each classifier
nb_cv_scores_ = cross_val_score(nb_classifier_cv_, X, y, cv=5)
knn_cv_scores_ = cross_val_score(knn_classifier_cv_, X, y, cv=5)
dt_cv_scores_ = cross_val_score(dt_classifier_cv_, X, y, cv=5)

print("Accuracy using 5-fold cross-validation:")
print(f"Naive Bayes: {np.mean(nb_cv_scores_):.4f}")
print(f"K-Nearest Neighbors: {np.mean(knn_cv_scores_):.4f}")
print(f"Decision Tree: {np.mean(dt_cv_scores_):.4f}")

Accuracy using 5-fold cross-validation:
Naive Bayes: 0.9533
K-Nearest Neighbors: 0.9667
Decision Tree: 0.9533
