In [48]:
## Задание №1 (3 балла)

# Реализовать алгоритм классификации KNN для мультиклассовой классификации с методом `predict_proba` для предсказания вероятностей классов.

# спользовать датасет Wine (`load_wine` из `sklearn.datasets`).

In [49]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn import metrics
from sklearn.model_selection import train_test_split
data = load_wine()
X, y = data['data'], data['target']

In [50]:
class MyKNN:
    def __init__(self, k=3):
        self.k = k
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        self.classes_ = np.unique(y_train)
        self.n_classes = len(self.classes_)

    def calculate_euc_distance(self, x, y):
        return np.sqrt(((x - y)**2).sum())

    def calculate_matrix(self, X_test):
        distances = np.zeros((X_test.shape[0], self.X_train.shape[0]))
        for i in range(X_test.shape[0]):
            for j in range(self.X_train.shape[0]):
                distances[i, j] = self.calculate_euc_distance(X_test[i], self.X_train[j])
        return distances
    def predict(self, X_test):
        matrix = self.calculate_matrix(X_test)
        matrix_idx = np.argsort(matrix, axis=1)[:, :self.k]
        predictions = []
        for idx_row in matrix_idx:
            neighbor_labels = self.y_train[idx_row]
            votes = {}
            for label in neighbor_labels:
                votes[label] = votes.get(label, 0) + 1
            best_class = max(votes.items(), key=lambda x: x[1])[0]
            predictions.append(best_class)

        return np.array(predictions)

    def predict_proba(self, X_test):
        matrix = self.calculate_matrix(X_test)
        matrix_idx = np.argsort(matrix, axis=1)[:, :self.k]
        n_samples = X_test.shape[0]
        proba_matrix = np.zeros((n_samples, self.n_classes))

        for i, idx_row in enumerate(matrix_idx):
            neighbor_labels = self.y_train[idx_row]
            counts = np.zeros(self.n_classes)
            for label in neighbor_labels:
                class_idx = np.where(self.classes_ == label)[0][0]
                counts[class_idx] += 1
            proba_matrix[i] = counts / self.k

        return proba_matrix

    def predict_log_proba(self, X_test):
        proba = self.predict_proba(X_test)
        epsilon = 1e-15
        proba = np.clip(proba, epsilon, 1 - epsilon)
        return np.log(proba)


In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True)

In [52]:
knn = MyKNN(k=5)
knn.fit(X_train, y_train)

In [53]:
y_pred = knn.predict(X_test)

In [54]:
y_proba = knn.predict_proba(X_test)

In [55]:
y_log_proba = knn.predict_log_proba(X_test)

In [56]:
for i in range(3):
    print(f"Объект {i}: {y_proba[i]}")
    print(f"Сумма вероятностей: {y_proba[i].sum()}")

Объект 0: [0.  0.4 0.6]
Сумма вероятностей: 1.0
Объект 1: [0.  0.8 0.2]
Сумма вероятностей: 1.0
Объект 2: [0.  0.4 0.6]
Сумма вероятностей: 1.0
