# 1. Алгоритм шифра Цезаря

Инициализируем входные данные

In [2]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Сделаем два алфавита
ALPHABETS = {
    'russian': 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя ',
    'english': 'abcdefghijklmnopqrstuvwxyz ',
}

# Создаем словари
CHAR_TO_INDEX = {lang: {char: idx for idx, char in enumerate(alphabet)} for lang, alphabet in ALPHABETS.items()}
INDEX_TO_CHAR = {lang: {idx: char for idx, char in enumerate(alphabet)} for lang, alphabet in ALPHABETS.items()}

# Определяим размеры входных данных и выходов
INPUT_SIZES = {lang: len(alphabet) for lang, alphabet in ALPHABETS.items()}
HIDDEN_SIZE = 128  # Определите размер скрытого слоя
OUTPUT_SIZES = {lang: len(alphabet) for lang, alphabet in ALPHABETS.items()}

Реализуем алгоритм шифра Цезаря

In [None]:
def caesar_cipher(text, shift, alphabet):
    result = ""
    for char in text:
        if char in alphabet:
            index = alphabet.index(char)

            new_index = (index + shift) % len(alphabet)
            result += alphabet[new_index]
        else:
            result += char 
    return result

Сгенерируем данные

In [4]:
def generate_data(num_samples=1000, shift=2, language ='russian'):
    alphabet = ALPHABETS[language]
    samples = []
    for _ in range(num_samples):
        length = random.randint(5, 15) 
        random_text = ''.join(random.choices(alphabet, k=length)) 
        encoded_text = caesar_cipher(random_text, shift, alphabet)
        samples.append((encoded_text, random_text)) 
    return samples

Подготовка данных для обучения

In [5]:
class CipherDataset(Dataset):
    def __init__(self, data, language):
        if language not in ALPHABETS:
            raise ValueError("Unsupported language. Please choose 'russian', 'english', or 'german'.")
        
        self.data = data
        self.language = language
        self.char_to_index = CHAR_TO_INDEX[self.language]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        encoded, original = self.data[index]
        return (
            torch.tensor([self.char_to_index[char] for char in encoded], dtype=torch.long),
            torch.tensor([self.char_to_index[char] for char in original], dtype=torch.long)
        )

Реализуем функцию для добавления паддинга

In [6]:
def collate_fn(batch):
    encoded_inputs = [item[0] for item in batch]
    original_outputs = [item[1] for item in batch]
    
    max_length = max(len(seq) for seq in encoded_inputs)
    
    padded_encoded = torch.stack([
        torch.cat([seq, torch.zeros(max_length - len(seq), dtype=torch.long)]) for seq in encoded_inputs
    ])
    
    padded_original = torch.stack([
        torch.cat([seq, torch.zeros(max_length - len(seq), dtype=torch.long)]) for seq in original_outputs
    ])
    
    return padded_encoded, padded_original

Реализуем архитектуру нейронной сети

In [7]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleRNN, self).__init__()
        self.embedding = nn.Embedding(input_size, 30)
        self.rnn = nn.RNN(input_size=30, hidden_size=HIDDEN_SIZE, batch_first=True)
        self.fc = nn.Linear(HIDDEN_SIZE, input_size)

    def forward(self, x):
        x = self.embedding(x)
        rnn_out, _ = self.rnn(x) 
        return self.fc(rnn_out) 

Реализуем финкцию для обучения

In [8]:
def train(model, dataloader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for encoded_x, original_y in dataloader:
            optimizer.zero_grad()

            output = model(encoded_x) 

            output = output.view(-1, OUTPUT_SIZES[dataloader.dataset.language])
            original_y = original_y.view(-1) 
            loss = criterion(output, original_y) 
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(dataloader)}')

Реализуем функцию для проверки качества

In [9]:
def evaluate(model, encoded_text, language):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor([CHAR_TO_INDEX[language][char] for char in encoded_text]).view(1, -1)
        output = model(input_tensor)
        output = output.view(-1, OUTPUT_SIZES[language])
        _, predicted_indices = torch.max(output, -1)
        decoded_text = ''.join([INDEX_TO_CHAR[language][idx.item()] for idx in predicted_indices])
    return decoded_text

Обучаемся!

In [13]:
if __name__ == "__main__":
    # Задаем параметры
    language = 'english'
    num_samples = 1000
    shift = 2
    
    data = generate_data(num_samples=num_samples, shift=shift, language=language)

    # Подготавливаем и распределяем данные
    dataset = CipherDataset(data, language=language)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

   # Инициализация модели
    model = SimpleRNN(INPUT_SIZES[language]) 

   # Определяем функцию потерь
    criterion = nn.CrossEntropyLoss()

   # Инициализируем оптимизатор
    optimizer = optim.AdamW(model.parameters(), lr=0.001)  
   # Обученаем модель
    train(model, dataloader, criterion, optimizer, num_epochs=30)

    # Пример для проверки
    test_samples = generate_data(10, shift=shift, language=language)
    for encoded, original in test_samples:
        decoded = evaluate(model, encoded, language)
        print(f'Encoded: {encoded}\nOriginal: {original}\nDecoded: {decoded}\n')

Epoch 1/30, Loss: 2.185536801815033
Epoch 2/30, Loss: 1.0692739002406597
Epoch 3/30, Loss: 0.3874638080596924
Epoch 4/30, Loss: 0.16363327228464186
Epoch 5/30, Loss: 0.1048462565522641
Epoch 6/30, Loss: 0.08086013910360634
Epoch 7/30, Loss: 0.06718504603486508
Epoch 8/30, Loss: 0.058372288825921714
Epoch 9/30, Loss: 0.054983965354040265
Epoch 10/30, Loss: 0.0472615328617394
Epoch 11/30, Loss: 0.044399083184544
Epoch 12/30, Loss: 0.04354273801436648
Epoch 13/30, Loss: 0.04164442984620109
Epoch 14/30, Loss: 0.036947177606634796
Epoch 15/30, Loss: 0.036052310664672405
Epoch 16/30, Loss: 0.032943352474831045
Epoch 17/30, Loss: 0.029630609205923975
Epoch 18/30, Loss: 0.02846706355921924
Epoch 19/30, Loss: 0.028252124349819496
Epoch 20/30, Loss: 0.028368386148940772
Epoch 21/30, Loss: 0.02170727313205134
Epoch 22/30, Loss: 0.018823354854248464
Epoch 23/30, Loss: 0.017165622062748298
Epoch 24/30, Loss: 0.015767399134347215
Epoch 25/30, Loss: 0.013492577025317587
Epoch 26/30, Loss: 0.014744395

## Выводы:
Loss равномерно падал на всё процессе обучения и после 30 эпох составил 0.013, что является очень хорошим результатом.