In [1]:
import numpy as np  # noqa
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
iris = datasets.load_iris()
df = pd.DataFrame(
    data=np.c_[iris["data"], iris["target"]], columns=iris["feature_names"] + ["target"]
)

In [3]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


# Data Separation

In [4]:
X = iris.data
y = iris.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define different ratios of training and test samples
ratios = [0.7, 0.8, 0.9]

# Define different distance estimation methods
distance_methods = ["euclidean", "manhattan"]

# Define different values of k (multiplicity)
k_values = [3, 5, 7]

# Define different voting options
voting_options = ["uniform", "distance"]

# Initialize a dictionary to store the accuracy of trained classifiers
classifier_accuracies = {}

# Loop over different ratios
for ratio in ratios:
    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - ratio)

    # Loop over different distance estimation methods
    for distance_method in distance_methods:
        # Loop over different values of k
        for k in k_values:
            # Loop over different voting options
            for voting_option in voting_options:
                # Initialize and train the kNN classifier
                knn = KNeighborsClassifier(
                    n_neighbors=k, weights=voting_option, metric=distance_method
                )
                knn.fit(X_train, y_train)

                # Predict the labels for the test set
                y_pred = knn.predict(X_test)

                # Calculate the accuracy of the classifier
                accuracy = accuracy_score(y_test, y_pred)
                clf_report = classification_report(y_test, y_pred)
                # Store the accuracy in the dictionary
                classifier_accuracies[
                    (
                        ratio,
                        distance_method,
                        k,
                        voting_option,
                    )
                ] = accuracy

# Sort the classifiers based on accuracy in descending order
top_classifiers = sorted(
    classifier_accuracies.items(), key=lambda x: x[1], reverse=True
)[:3]

# Print the top three classifiers and their accuracies
for i, (params, accuracy) in enumerate(top_classifiers):
    ratio, distance_method, k, voting_option = params
    print(
        f"Top {i + 1} Classifier: Ratio={ratio}, Distance={distance_method}, k={k}, Voting={voting_option}, Accuracy={accuracy:.4f}"
    )

Top 1 Classifier: Ratio=0.8, Distance=euclidean, k=5, Voting=uniform, Accuracy=1.0000
Top 2 Classifier: Ratio=0.8, Distance=euclidean, k=5, Voting=distance, Accuracy=1.0000
Top 3 Classifier: Ratio=0.8, Distance=manhattan, k=5, Voting=distance, Accuracy=1.0000
