# 1. Алгоритм шифра Цезаря

Инициализируем входные данные

In [32]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Сделаем два алфавита
lang_alphabets = {
    'russian': 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя ',
    'english': 'abcdefghijklmnopqrstuvwxyz ',
}

# Создаем словари
char_to_index = {}
for lang, alphabet in lang_alphabets.items():
    char_to_index[lang] = {char: idx for idx, char in enumerate(alphabet)}
    
index_to_char = {}
for lang, alphabet in lang_alphabets.items():
    index_to_char[lang] = {idx: char for idx, char in enumerate(alphabet)}

# Определяим размеры входных данных и выходов
input_layer_size = {lang: len(alphabet) for lang, alphabet in lang_alphabets.items()}
hidden_layer_size = 128  # Определите размер скрытого слоя
output_layer_size = {lang: len(alphabet) for lang, alphabet in lang_alphabets.items()}

Реализуем алгоритм шифра Цезаря

In [33]:
def caesar_cipher(text, shift, alphabet):
    res = ""
    for char in text:
        if char in alphabet:
            index = alphabet.index(char)
            new_index = (index + shift) % len(alphabet)
            res += alphabet[new_index]
        else:
            res += char 
    return res

Сгенерируем данные

In [34]:
def generate_data(num_samples=5000, shift=2, language ='russian'):
    alphabet = lang_alphabets[language]
    samples = []
    for _ in range(num_samples):
        length = random.randint(5, 15) 
        random_text = ''.join(random.choices(alphabet, k=length)) 
        encoded_text = caesar_cipher(random_text, shift, alphabet)
        samples.append((encoded_text, random_text)) 
    return samples

Подготовка данных для обучения

In [35]:
class CipherDataset(Dataset):
    def __init__(self, data, language):
        self.data = data
        self.language = language
        self.char_to_index = char_to_index[self.language]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        encoded, original = self.data[index]
        return (
            torch.tensor([self.char_to_index[char] for char in encoded], dtype=torch.long),
            torch.tensor([self.char_to_index[char] for char in original], dtype=torch.long)
        )

Реализуем функцию для добавления паддинга

In [36]:
def collate_fn(batch):
    encoded_inputs = [item[0] for item in batch]
    original_outputs = [item[1] for item in batch]
    
    max_length = max(len(seq) for seq in encoded_inputs)
    
    padded_encoded = torch.stack([
        torch.cat([seq, torch.zeros(max_length - len(seq), dtype=torch.long)]) for seq in encoded_inputs
    ])
    
    padded_original = torch.stack([
        torch.cat([seq, torch.zeros(max_length - len(seq), dtype=torch.long)]) for seq in original_outputs
    ])
    
    return padded_encoded, padded_original

Реализуем архитектуру нейронной сети

In [37]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleRNN, self).__init__()
        self.embedding = nn.Embedding(input_size, 30)
        self.rnn = nn.RNN(input_size=30, hidden_size=hidden_layer_size, batch_first=True)
        self.fc = nn.Linear(hidden_layer_size, input_size)

    def forward(self, x):
        x = self.embedding(x)
        rnn_out, _ = self.rnn(x) 
        return self.fc(rnn_out) 

Реализуем финкцию для обучения

In [38]:
def train(model, dataloader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for encoded_x, original_y in dataloader:
            optimizer.zero_grad()

            output = model(encoded_x) 

            output = output.view(-1, output_layer_size[dataloader.dataset.language])
            original_y = original_y.view(-1) 
            loss = criterion(output, original_y) 
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(dataloader)}')

Реализуем функцию для проверки качества

In [39]:
def evaluate(model, encoded_text, language):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor([char_to_index[language][char] for char in encoded_text]).view(1, -1)
        output = model(input_tensor)
        output = output.view(-1, output_layer_size[language])
        _, predicted_indices = torch.max(output, -1)
        decoded_text = ''.join([index_to_char[language][idx.item()] for idx in predicted_indices])
    return decoded_text

Обучаемся!

In [41]:
if __name__ == "__main__":
    # Задаем параметры
    language = 'english'
    num_samples = 1000
    shift = 2
    
    data = generate_data(num_samples=num_samples, shift=shift, language=language)

    # Подготавливаем и распределяем данные
    dataset = CipherDataset(data, language=language)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

   # Инициализация модели
    model = SimpleRNN(input_layer_size[language]) 

   # Определяем функцию потерь
    criterion = nn.CrossEntropyLoss()

   # Инициализируем оптимизатор
    optimizer = optim.AdamW(model.parameters(), lr=0.001)  
   # Обученаем модель
    train(model, dataloader, criterion, optimizer, num_epochs=30)

    # Пример для проверки
    test_samples = generate_data(10, shift=shift, language=language)
    for encoded, original in test_samples:
        decoded = evaluate(model, encoded, language)
        print(f'Encoded: {encoded}\nOriginal: {original}\nDecoded: {decoded}\n')

Epoch 1/30, Loss: 2.148571878671646
Epoch 2/30, Loss: 1.0834589507430792
Epoch 3/30, Loss: 0.3885561008937657
Epoch 4/30, Loss: 0.1620003948919475
Epoch 5/30, Loss: 0.10724074952304363
Epoch 6/30, Loss: 0.08267764491029084
Epoch 7/30, Loss: 0.06943792675156146
Epoch 8/30, Loss: 0.060939543414860964
Epoch 9/30, Loss: 0.05538130353670567
Epoch 10/30, Loss: 0.050661571440286934
Epoch 11/30, Loss: 0.04743930004769936
Epoch 12/30, Loss: 0.04461652191821486
Epoch 13/30, Loss: 0.04204148007556796
Epoch 14/30, Loss: 0.0407789614982903
Epoch 15/30, Loss: 0.03670441807480529
Epoch 16/30, Loss: 0.03434006532188505
Epoch 17/30, Loss: 0.03368434909498319
Epoch 18/30, Loss: 0.030366720544407144
Epoch 19/30, Loss: 0.027158699231222272
Epoch 20/30, Loss: 0.02601560510811396
Epoch 21/30, Loss: 0.022681642178213224
Epoch 22/30, Loss: 0.018925343814771622
Epoch 23/30, Loss: 0.021787123594549485
Epoch 24/30, Loss: 0.02117867604829371
Epoch 25/30, Loss: 0.018678835622267798
Epoch 26/30, Loss: 0.02365209732

## Выводы:
Loss равномерно падал на всё процессе обучения и после 30 эпох составил 0.0063, что является очень хорошим результатом.