In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset

# Parametry
INPUT_BITS = 16
OUTPUT_BITS = INPUT_BITS + 1
BATCH_SIZE = 512
HIDDEN_SIZE = 64
NUM_LAYERS = 2
LEARNING_RATE = 0.001
EPOCHS = 20

# Generator danych
def generate_data(num_samples):
    a = torch.randint(0, 2**INPUT_BITS, (num_samples,))
    b = torch.randint(0, 2**INPUT_BITS, (num_samples,))
    c = a + b
    
    # Konwersja na binarne (LSB first)
    inputs = torch.zeros((num_samples, INPUT_BITS, 2), dtype=torch.float32)
    targets = torch.zeros((num_samples, OUTPUT_BITS), dtype=torch.float32)
    
    for i in range(num_samples):
        # Konwersja a i b na binarne (LSB first)
        bin_a = [int(bit) for bit in bin(a[i].item())[2:].zfill(INPUT_BITS)[::-1]]
        bin_b = [int(bit) for bit in bin(b[i].item())[2:].zfill(INPUT_BITS)[::-1]]
        bin_c = [int(bit) for bit in bin(c[i].item())[2:].zfill(OUTPUT_BITS)[::-1]]
        
        # Ustawienie danych wejściowych
        for j in range(INPUT_BITS):
            inputs[i, j, 0] = bin_a[j]
            inputs[i, j, 1] = bin_b[j]
        
        # Ustawienie danych wyjściowych
        targets[i, :] = torch.tensor(bin_c[:OUTPUT_BITS])
    
    return inputs, targets

# Definicja modelu RNN
class BinaryAdderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BinaryAdderRNN, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Dodajemy dodatkowy krok z wejściem (0,0)
        batch_size = x.size(0)
        pad_step = torch.zeros((batch_size, 1, 2), device=x.device)
        x_padded = torch.cat([x, pad_step], dim=1)
        
        # Propagacja przez LSTM
        lstm_out, _ = self.lstm(x_padded)
        
        # Warstwa liniowa
        output = self.fc(lstm_out)
        return output.squeeze(-1)

# Generowanie danych
train_inputs, train_targets = generate_data(100000)
val_inputs, val_targets = generate_data(10000)

# Tworzenie DataLoader
train_dataset = TensorDataset(train_inputs, train_targets)
val_dataset = TensorDataset(val_inputs, val_targets)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# Inicjalizacja modelu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryAdderRNN(
    input_size=2,
    hidden_size=HIDDEN_SIZE,
    num_layers=NUM_LAYERS,
    output_size=1
).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Trening modelu
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    # Walidacja
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            
            # Obliczanie dokładności binarnej
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predictions == targets).sum().item()
            total += targets.numel()
    
    accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f} | Accuracy: {accuracy:.2f}%')

# Testowanie na przykładowych danych
def test_model(a, b):
    a_bin = [int(bit) for bit in bin(a)[2:].zfill(INPUT_BITS)[::-1]]
    b_bin = [int(bit) for bit in bin(b)[2:].zfill(INPUT_BITS)[::-1]]
    
    input_tensor = torch.zeros((1, INPUT_BITS, 2), dtype=torch.float32)
    for j in range(INPUT_BITS):
        input_tensor[0, j, 0] = a_bin[j]
        input_tensor[0, j, 1] = b_bin[j]
    
    model.eval()
    with torch.no_grad():
        output = model(input_tensor.to(device))
        prediction = (torch.sigmoid(output) > 0.5).int().cpu().numpy()[0]
    
    pred_bin = ''.join(str(bit) for bit in prediction[::-1])
    actual_sum = a + b
    pred_sum = int(pred_bin, 2)
    
    print(f'Dodawanie: {a} + {b} = {actual_sum}')
    print(f'Przewidziane: {pred_sum} (binarnie: {pred_bin})')
    print(f'Poprawność: {actual_sum == pred_sum}')

# Przykładowe testy
test_model(3, 5)        # 8
test_model(1000, 2500)  # 3500
test_model(65535, 1)    # 65536 (przepełnienie 16-bit)

Epoch 1/20 | Loss: 0.6796 | Accuracy: 67.76%
Epoch 2/20 | Loss: 0.2315 | Accuracy: 98.60%
Epoch 3/20 | Loss: 0.0208 | Accuracy: 100.00%
Epoch 4/20 | Loss: 0.0065 | Accuracy: 100.00%
Epoch 5/20 | Loss: 0.0030 | Accuracy: 100.00%
Epoch 6/20 | Loss: 0.0017 | Accuracy: 100.00%
Epoch 7/20 | Loss: 0.0011 | Accuracy: 100.00%
Epoch 8/20 | Loss: 0.0008 | Accuracy: 100.00%
Epoch 9/20 | Loss: 0.0006 | Accuracy: 100.00%
Epoch 10/20 | Loss: 0.0004 | Accuracy: 100.00%
Epoch 11/20 | Loss: 0.0003 | Accuracy: 100.00%
Epoch 12/20 | Loss: 0.0003 | Accuracy: 100.00%
Epoch 13/20 | Loss: 0.0002 | Accuracy: 100.00%
Epoch 14/20 | Loss: 0.0002 | Accuracy: 100.00%
Epoch 15/20 | Loss: 0.0002 | Accuracy: 100.00%
Epoch 16/20 | Loss: 0.0001 | Accuracy: 100.00%
Epoch 17/20 | Loss: 0.0001 | Accuracy: 100.00%
Epoch 18/20 | Loss: 0.0001 | Accuracy: 100.00%
Epoch 19/20 | Loss: 0.0001 | Accuracy: 100.00%
Epoch 20/20 | Loss: 0.0001 | Accuracy: 100.00%
Dodawanie: 3 + 5 = 8
Przewidziane: 8 (binarnie: 00000000000001000)
Popr