In [None]:
import numpy as np

def one_hot(Y: np.ndarray, num_class) -> np.array: # TESDED
    one_hot_Y = np.zeros((Y.shape[0], num_class)) 
    one_hot_Y[np.arange(Y.shape[0]), Y] += 1
    return one_hot_Y



class BatchGenerator:  # TESTED
    def __init__(self, X: np.ndarray, Y: np.ndarray, batch_size: int, shuffle: bool = True):
        """
        X: np.ndarray
            Входные данные, размеры [n_samples, n_features]
        Y: np.ndarray
            Метки данных, размеры [n_samples] или [n_samples, n_classes]
        batch_size: int
            Размер мини-батча
        shuffle: bool
            Перемешивать ли данные перед каждой эпохой
        """
        self.X = X
        self.Y = Y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_samples = X.shape[0]  # Количество примеров
        self.indices = np.arange(self.num_samples)  # Индексы для перемешивания

    def __iter__(self):
        """
        Итератор для перебора батчей.
        """
        if self.shuffle:
            # print("Перемешано")
            # Перемешиваем индексы в начале каждой эпохи
            np.random.shuffle(self.indices)
        
        
        # print(self.indices)
        # Возвращаем батчи данных
        for start_idx in range(0, self.num_samples - self.batch_size + 1, self.batch_size):
            end_idx = start_idx + self.batch_size  
            batch_indices = self.indices[start_idx:end_idx]
            # print(batch_indices)
            yield self.X[batch_indices], self.Y[batch_indices]

    def __len__(self):
        """
        Возвращает количество батчей за одну эпоху.
        """
        return (self.num_samples // self.batch_size)



class LinearClassifier:
    def __init__(self, n_features:int, n_classes:int):
        # self.bias = np.random.rand(n_classes)-0.5
        self.Weights = np.random.rand(n_features, n_classes) - 0.5
        self.weights_grad = None
        self.bias_grad = None
        self.input = None

    def __call__(self, X:np.ndarray):
        self.input = X.copy()
        self.output = X @ self.Weights # + self.bias

        return self.output
    
    def backprop(self, grads_per_class:np.ndarray, Y_pred:np.ndarray, Y_real:np.ndarray):
        # print(f"{self.input=}")
        # print(f"{grads_per_class=}")
        # print(f"{self.Weights=}")
        self.weights_grad = self.input.T @ grads_per_class 
        # print(f"{Y_pred=}")
        # print(f"{Y_real=}")
        # print(f"{self.weights_grad=}")
        logits_updated = self.input @ self.weights_grad

        numerator = np.sum(np.array([np.sum((Y_pred[:, i] - (Y_real[:, i]==1)) * logits_updated[:,i]) for i in range(self.Weights.shape[1])]))
        denominator = np.sum(np.array([np.sum(logits_updated[:, i] ** 2) for i in range(self.Weights.shape[1])]))


        alpha = numerator / denominator #if denominator != 0 else 0.0
        print(alpha)
        self.weights_grad = self.weights_grad * alpha
        # print(f"{self.weights_grad=}")
        # self.bias_grad = np.sum(grad, axis=0)


    def upply_grad(self):
        self.Weights -= self.weights_grad
        # self.bias -= self.bias_grad


class LinearLoss():
    def __init__(self, n_classes:int, loss_type:str = "sigmoid", lr:float = 0.01):
        self.n_classes = n_classes
        self.loss_type = loss_type
        self.lr = lr
        self.Y_real = None
    
    def sigmoid(self, X:np.ndarray):
         return 1 / (1 + np.exp(-X))
    
    def __call__(self, Y_logit:np.ndarray, Y_real:np.ndarray):
        Y_real = one_hot(Y_real, self.n_classes)
        self.Y_real = Y_real
        # print(f"{Y_real=}")

        if self.loss_type == "sigmoid":
            self.Y_pred = self.sigmoid(Y_logit)
            gradient_func = lambda y_pred, y_real: (y_pred - y_real)

        
        

        return np.array([gradient_func(self.Y_pred[:, i], self.Y_real[:, i]==1) for i in range(self.n_classes)]).T, self.Y_pred, self.Y_real

    def get_loss(self):
        loss = np.sum([-(self.Y_real[:, i]==1).astype(int) * np.log(self.Y_pred[:, i]) - (1 - (self.Y_real[:, i]==1).astype(int)) * np.log(1 - self.Y_pred[:, i]) for i in range(self.n_classes)])
        return loss
    


loss = LinearLoss(n_classes=3)
np.random.seed(3)
linear = LinearClassifier(n_features=4, n_classes=3)
labels = np.array([0,1,2,2, 1])

for i in range(1000):
    out = linear(np.array([[3, 1, 2, 0], [2,3,1, 0], [0,6,7, 0], [1,1,3, 0], [1,3,1, 0]]))

    # print(f"{loss.sigmoid(out)=}")

    grads = loss(out, labels)
    # print(loss.get_loss())
    linear.backprop(*grads)
    linear.upply_grad()

print(np.argmax(out, axis=1))

0.010981202467635105
0.01294830167320047
0.017048472237918222
0.025806915800033133
0.0437570726431439
0.0691013911704411
0.08460149427246136
0.08766431074948791
0.08447320066674642
0.08055222749084255
0.07669018385829383
0.07336671733013422
0.07053464881912688
0.06812074819192433
0.06605013765596715
0.06425969993111877
0.06269885431448713
0.061327590311156976
0.06011423985903411
0.05903358613125851
0.05806538435008643
0.0571932366240599
0.05640374246451043
0.05568585652034909
0.05503040026132733
0.05442968790303112
0.05387723744404729
0.05336754552591487
0.05289591052179351
0.05245829237046354
0.05205120064056753
0.05167160446221179
0.05131685953302489
0.05098464855920862
0.05067293234599018
0.05037990938893306
0.05010398229654663
0.049843729737537874
0.04959788288303727
0.04936530552708463
0.04914497723351344
0.048935978985834364
0.04873748091747206
0.04854873177921437
0.048369049863831115
0.048197815158184734
0.04803446253357469
0.047878475817651096
0.04772938261765738
0.047586749786