-----------------------
#### detect tie break
------------------------

- using iris data

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from scipy import stats

In [7]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

In [8]:
# Number of samples in the dataset
num_samples = len(X)

In [9]:
# Initialize a KNN classifier with k=3
knn_classifier = KNeighborsClassifier(n_neighbors=5)

In [10]:
# Initialize a list to store tie-break situations
tie_break_situations = []

In [11]:
# Perform Leave-One-Out Cross-Validation (LOOCV)
for i in range(num_samples):
    # Prepare training data by excluding the i-th sample
    X_train = np.delete(X, i, axis=0)
    y_train = np.delete(y, i)

    # Test sample
    X_test = X[i].reshape(1, -1)
    y_test = y[i]

    # Fit the KNN classifier on the training data
    knn_classifier.fit(X_train, y_train)

    # Get the indices and distances of the k-nearest neighbors
    _, neigh_ind = knn_classifier.kneighbors(X_test)

    # Extract the class labels of the neighbors
    _y = np.array([y_train[i] for i in neigh_ind[0]])

    # Calculate mode and check for tie-break
    mode, _ = stats.mode(_y, keepdims=True)

    # Check if there is a tie-break
    if np.count_nonzero(mode == mode[0]) > 1:
        tie_break_situations.append(i)

In [12]:
# Print tie-break situations
if len(tie_break_situations) > 0:
    print("Tie-break situations found for test samples:", tie_break_situations)
else:
    print("No tie-break situations found.")

No tie-break situations found.


#### try k = range(30)

In [13]:
# Create a list to store tie-break situations for each k
tie_break_situations_by_k = {}

In [14]:
# Loop over different values of k
for k in range(1, 31):
    # Initialize a list to store tie-break situations
    tie_break_situations = []

    # Create a KNN classifier with the current value of k
    knn_classifier = KNeighborsClassifier(n_neighbors=k)

    # Perform Leave-One-Out Cross-Validation (LOOCV)
    for i in range(num_samples):
        # Prepare training data by excluding the i-th sample
        X_train = np.delete(X, i, axis=0)
        y_train = np.delete(y, i)

        # Test sample
        X_test = X[i].reshape(1, -1)
        y_test = y[i]

        # Fit the KNN classifier on the training data
        knn_classifier.fit(X_train, y_train)

        # Get the indices and distances of the k-nearest neighbors
        _, neigh_ind = knn_classifier.kneighbors(X_test)

        # Extract the class labels of the neighbors
        _y = np.array([y_train[i] for i in neigh_ind[0]])

        # Calculate mode and check for tie-break
        mode, _ = stats.mode(_y, keepdims=True)

        # Check if there is a tie-break
        if np.count_nonzero(mode == mode[0]) > 1:
            tie_break_situations.append(i)

    # Store tie-break situations for the current k
    tie_break_situations_by_k[k] = tie_break_situations

In [15]:
# Print tie-break situations for each k
for k, tie_break_situations in tie_break_situations_by_k.items():
    if len(tie_break_situations) > 0:
        print(f"Tie-break situations found for k={k}: {tie_break_situations}")
    else:
        print(f"No tie-break situations found for k={k}")

No tie-break situations found for k=1
No tie-break situations found for k=2
No tie-break situations found for k=3
No tie-break situations found for k=4
No tie-break situations found for k=5
No tie-break situations found for k=6
No tie-break situations found for k=7
No tie-break situations found for k=8
No tie-break situations found for k=9
No tie-break situations found for k=10
No tie-break situations found for k=11
No tie-break situations found for k=12
No tie-break situations found for k=13
No tie-break situations found for k=14
No tie-break situations found for k=15
No tie-break situations found for k=16
No tie-break situations found for k=17
No tie-break situations found for k=18
No tie-break situations found for k=19
No tie-break situations found for k=20
No tie-break situations found for k=21
No tie-break situations found for k=22
No tie-break situations found for k=23
No tie-break situations found for k=24
No tie-break situations found for k=25
No tie-break situations found for 