In [2]:
import torch
import numpy as np

In [3]:
class SimpleNNBarebone:
    def __init__(self):
        self.W1 = torch.randn(256, 64, requires_grad=True) * 0.01
        self.b1 = torch.zeros(64, requires_grad=True)
        
        self.W2 = torch.randn(64, 16, requires_grad=True) * 0.01
        self.b2 = torch.zeros(16, requires_grad=True)
        
        self.W3 = torch.randn(16, 4, requires_grad=True) * 0.01
        self.b3 = torch.zeros(4, requires_grad=True)
    
    
    def sigmoid(self, x):
        return 1 / (1 + torch.exp(-x))
    
    
    def tanh(self, x):
        return torch.tanh(x)
    
    
    def softmax(self, x):
        exp_x = torch.exp(x - torch.max(x, dim=-1, keepdim=True)[0])
        return exp_x / torch.sum(exp_x, dim=-1, keepdim=True)
    
    
    def forward(self, x):
        self.x = x
        self.z1 = x.mm(self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        
        self.z2 = self.a1.mm(self.W2) + self.b2
        self.a2 = self.tanh(self.z2)
        
        self.z3 = self.a2.mm(self.W3) + self.b3
        self.a3 = self.softmax(self.z3)
        
        return self.a3
    
    
    def compute_loss(self, output, target):
        return -torch.sum(target * torch.log(output))


    def backward(self, target, learning_rate=0.01):
        m = target.size(0)
        
        dL_dz3 = self.a3 - target
        
        dL_dW3 = self.a2.T.mm(dL_dz3) / m
        dL_db3 = torch.sum(dL_dz3, dim=0) / m
        
        dL_da2 = dL_dz3.mm(self.W3.T)
        
        dL_dz2 = dL_da2 * (1 - self.a2 ** 2)  # Производная Tanh
        
        dL_dW2 = self.a1.T.mm(dL_dz2) / m
        dL_db2 = torch.sum(dL_dz2, dim=0) / m
        
        dL_da1 = dL_dz2.mm(self.W2.T)
        
        dL_dz1 = dL_da1 * (self.a1 * (1 - self.a1))  # Производная Sigmoid
        
        dL_dW1 = self.x.T.mm(dL_dz1) / m
        dL_db1 = torch.sum(dL_dz1, dim=0) / m
        
        with torch.no_grad():
            self.W1 -= learning_rate * dL_dW1
            self.b1 -= learning_rate * dL_db1
            self.W2 -= learning_rate * dL_dW2
            self.b2 -= learning_rate * dL_db2
            self.W3 -= learning_rate * dL_dW3
            self.b3 -= learning_rate * dL_db3


    def train_step(self, x, target, learning_rate=0.01):
        output = self.forward(x)
        loss = self.compute_loss(output, target)
        self.backward(target, learning_rate)
        return loss

In [4]:
model = SimpleNNBarebone()

x = torch.randn(10, 256)
    
y = torch.eye(4)[torch.randint(0, 4, (10,))]


In [39]:
loss = model.train_step(x, y, learning_rate=0.1)
f"Loss after one step: {loss.item()}"

'Loss after one step: 12.942134857177734'