# Machine learning lab #1 (kNN)

### Andosov German, M33391

##### Part 3. Choosing hyperparameters

In [73]:
%run "02-algorithm.ipynb"

import optuna

from IPython.display import clear_output
from random import shuffle

clear_output(wait=False)

Choosing hyperparameters for my implementation (for fixed and variable windows separately):

In [74]:
kr_map = {"uniform": Kernel.UNIFORM, "triangular": Kernel.TRIANGULAR, "epanechnikov": Kernel.EPANECHNIKOV, "gaussian": Kernel.GAUSSIAN}
mt_map = {"manhattan": Metric.MANHATTAN, "euclidean": Metric.EUCLIDEAN, "cosine": Metric.COSINE}


def my_objective(window_type, trial):
    if window_type == WindowType.FIXED:
        window_param = trial.suggest_float("h", 0.01, 1)
    else:
        window_param = trial.suggest_int("k", 1, 1000)
    kernel = trial.suggest_categorical("Kernel", ["uniform", "triangular", "epanechnikov", "gaussian"])
    metric = trial.suggest_categorical("Metric", ["manhattan", "euclidean", "cosine"])
    predictions = KNN(window_param, window_type, kr_map[kernel], mt_map[metric]).fit(X_train, y_train, default_weights).predict(X_val)
    return accuracy(y_val.to_list(), predictions)


my_objective_fixed = lambda trial: my_objective(WindowType.FIXED, trial)
my_objective_variable = lambda trial: my_objective(WindowType.VARIABLE, trial)

study = optuna.create_study(direction="maximize")
study.optimize(my_objective_fixed, n_trials=100)
best_fixed = study.best_trial


  return bound(*args, **kwds)
[I 2024-03-10 09:37:49,901] A new study created in memory with name: no-name-01e871d1-cc91-4607-bc46-2bceebe1142b


Val and test sizes are 2336 2336


[I 2024-03-10 09:37:50,239] Trial 0 finished with value: 9.97431506849315 and parameters: {'h': 0.7058266743895694, 'Kernel': 'uniform', 'Metric': 'manhattan'}. Best is trial 0 with value: 9.97431506849315.
[I 2024-03-10 09:37:50,732] Trial 1 finished with value: 86.00171232876713 and parameters: {'h': 0.8807543889012791, 'Kernel': 'uniform', 'Metric': 'cosine'}. Best is trial 1 with value: 86.00171232876713.
[I 2024-03-10 09:37:51,083] Trial 2 finished with value: 83.98972602739725 and parameters: {'h': 0.11685530438378469, 'Kernel': 'gaussian', 'Metric': 'euclidean'}. Best is trial 1 with value: 86.00171232876713.
[I 2024-03-10 09:37:51,567] Trial 3 finished with value: 86.00171232876713 and parameters: {'h': 0.9149800789670789, 'Kernel': 'uniform', 'Metric': 'cosine'}. Best is trial 1 with value: 86.00171232876713.
[I 2024-03-10 09:37:51,841] Trial 4 finished with value: 9.97431506849315 and parameters: {'h': 0.7588256155665635, 'Kernel': 'uniform', 'Metric': 'manhattan'}. Best is t

In [75]:
study = optuna.create_study(direction="maximize")
study.optimize(my_objective_variable, n_trials=100)

[I 2024-03-10 09:38:35,474] A new study created in memory with name: no-name-7b8f1d46-99ac-4569-92eb-62b289e91103
[I 2024-03-10 09:38:35,861] Trial 0 finished with value: 86.17294520547945 and parameters: {'k': 121, 'Kernel': 'gaussian', 'Metric': 'manhattan'}. Best is trial 0 with value: 86.17294520547945.
[I 2024-03-10 09:38:36,995] Trial 1 finished with value: 83.98972602739725 and parameters: {'k': 570, 'Kernel': 'triangular', 'Metric': 'cosine'}. Best is trial 0 with value: 86.17294520547945.
[I 2024-03-10 09:38:38,235] Trial 2 finished with value: 80.82191780821918 and parameters: {'k': 882, 'Kernel': 'uniform', 'Metric': 'cosine'}. Best is trial 0 with value: 86.17294520547945.
[I 2024-03-10 09:38:38,763] Trial 3 finished with value: 83.4332191780822 and parameters: {'k': 321, 'Kernel': 'gaussian', 'Metric': 'euclidean'}. Best is trial 0 with value: 86.17294520547945.
[I 2024-03-10 09:38:39,849] Trial 4 finished with value: 81.89212328767124 and parameters: {'k': 727, 'Kernel': 

In [76]:
print("Best with fixed window", best_fixed.params)
print("Best with variable window", study.best_trial.params)

Best with fixed window {'h': 0.028965907966343987, 'Kernel': 'triangular', 'Metric': 'cosine'}
Best with variable window {'k': 82, 'Kernel': 'triangular', 'Metric': 'manhattan'}


Choosing hyperparameters for the sklearn implementation:

In [78]:
def lib_objective(trial):
    n_neighbors = trial.suggest_int("k", 1, 200)
    weights = trial.suggest_categorical("Weight function", ["uniform", "distance"])
    algorithm = trial.suggest_categorical("NN Algorithm", ["ball_tree", "kd_tree", "brute"])
    p = trial.suggest_float("Minkowski degree", 1, 5) # p < 1 breaks kd_tree
    predictions = KNeighborsClassifier(n_neighbors, weights=weights, algorithm=algorithm, p=p).fit(X_train, y_train).predict(X_val)
    return accuracy(y_val.to_list(), predictions)


lib_objective_fixed = lambda trial: lib_objective(trial)
lib_objective_variable = lambda trial: lib_objective(trial)

study = optuna.create_study(direction="maximize")
study.optimize(lib_objective, n_trials=50)

[I 2024-03-10 09:40:22,684] A new study created in memory with name: no-name-0055d2b9-5276-44cc-96c9-e4db3e04a8e4
[I 2024-03-10 09:40:31,227] Trial 0 finished with value: 83.6472602739726 and parameters: {'k': 188, 'Weight function': 'distance', 'NN Algorithm': 'ball_tree', 'Minkowski degree': 2.636435152634192}. Best is trial 0 with value: 83.6472602739726.
[I 2024-03-10 09:40:37,932] Trial 1 finished with value: 86.0445205479452 and parameters: {'k': 74, 'Weight function': 'uniform', 'NN Algorithm': 'kd_tree', 'Minkowski degree': 1.4070140923784678}. Best is trial 1 with value: 86.0445205479452.
[I 2024-03-10 09:40:45,569] Trial 2 finished with value: 83.98972602739725 and parameters: {'k': 107, 'Weight function': 'uniform', 'NN Algorithm': 'ball_tree', 'Minkowski degree': 3.6807065869966102}. Best is trial 1 with value: 86.0445205479452.
[I 2024-03-10 09:40:46,894] Trial 3 finished with value: 86.42979452054794 and parameters: {'k': 38, 'Weight function': 'distance', 'NN Algorithm':

In [None]:
print(study.best_trial.params)

{'k': 24, 'Weight function': 'uniform', 'NN Algorithm': 'ball_tree', 'Minkowski degree': 2.4695490839745413}
