In [1]:
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets

from IPython.display import display

from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_classification
from sklearn.model_selection import train_test_split

%matplotlib inline

## KNN для задачи классификации

In [2]:
X, y = make_moons(n_samples=200, noise=0.3, random_state=0)
X = StandardScaler().fit_transform(X)

In [3]:
h = 0.02

x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

In [4]:
from matplotlib.colors import ListedColormap

cm_bright = ListedColormap(["#0000FF", "#FF0000"])

In [5]:
n_neighbors = widgets.IntSlider(
    value=7,
    min=1,
    max=23,
    step=2,
    description='K',
    orientation='horizontal',
)


def exp_distance(x):
    weights = 0.05 ** x
    weights = weights / weights.sum(axis=1, keepdims=True)
    return weights


def exp_positions(x):
    weights = 0.75 ** np.arange(x.shape[1])[np.newaxis, :]
    weights = np.repeat(weights, repeats=x.shape[0], axis=0)
    weights = weights / weights.sum(axis=1, keepdims=True)
    return weights


@widgets.interact(n_neighbors=n_neighbors, weights=[
    'uniform', exp_positions, 'distance', exp_distance])
def plot_clf(n_neighbors, weights):
    fig, ax = plt.subplots(1, 1, figsize=(7, 7))
    
    clf = KNeighborsClassifier(n_neighbors=n_neighbors,
                               weights=weights)
    clf.fit(X, y)
    score = clf.score(X, y)

    Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)
    Z_boundary = Z > 0.5
    
    ax.contourf(xx, yy, Z, cmap='coolwarm', alpha=0.6)
    ax.contour(xx, yy, Z_boundary, colors='k', alpha=0.3)

    ax.scatter(
        X[:, 0], X[:, 1], c=y, cmap=cm_bright, edgecolors="k"
    )

    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())

interactive(children=(IntSlider(value=7, description='K', max=23, min=1, step=2), Dropdown(description='weight…

## KNN для задачи регрессии

In [6]:
random_state = np.random.RandomState(26)

In [7]:
X = np.r_[np.linspace(0, 1, 10), np.linspace(0, 1, 20)]
y = np.exp(0.75 * X) + random_state.normal(0, 0.2, size=X.shape)

In [8]:
h = 0.005

xx = np.arange(X.min() - h, X.max() + 2 * h, h)

In [9]:
sns.set_style('darkgrid')

n_neighbors = widgets.IntSlider(
    value=7,
    min=1,
    max=23,
    step=2,
    description='K',
    orientation='horizontal',
)


@widgets.interact(n_neighbors=n_neighbors, weights=[
    'uniform', exp_positions, 'distance', exp_distance])
def plot_reg(n_neighbors, weights):
    fig, ax = plt.subplots(1, 1, figsize=(7, 4))

    reg = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights)
    reg.fit(X[:, np.newaxis], y)
    
    score = reg.predict(xx[:, np.newaxis])

    ax.scatter(X, y, c='black', label='выборка')
    ax.plot(xx, score, c='green', label=f'{n_neighbors}NN')
    ax.legend()

interactive(children=(IntSlider(value=7, description='K', max=23, min=1, step=2), Dropdown(description='weight…

## Проклятие размерности

In [None]:
from scipy.spatial.distance import pdist

In [None]:
random_state = np.random.RandomState(96)

In [None]:
vec = random_state.uniform(size=(10_000, 3))
distances = pdist(vec)

mean, std = distances.mean(), distances.std()
print('mean: %.4f; std: %.4f' % (mean, std))

fig, ax = plt.subplots(1, 1, figsize=(7, 4))
_ = ax.hist(distances, bins=50)

In [None]:
vec = random_state.uniform(size=(1000, 1000))
distances = pdist(vec)

mean, std = distances.mean(), distances.std()
print('mean: %.4f; std: %.4f' % (mean, std))

fig, ax = plt.subplots(1, 1, figsize=(7, 4))
_ = ax.hist(distances, bins=50)