<a href="https://colab.research.google.com/github/Vasyl808/NULP_NLP/blob/main/rnn_quest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import json
from typing import Dict, Union, List

In [None]:
def load_parameters(base_path: str) -> Dict[str, Union[List, torch.tensor]]:
    file_names: Dict[str, str] = {
        "vocab": "vocab.json",
        "embeddings": "embedding.weight.json",
        "W_h": "W_h.weight.json",
        "W_y": "W_y.weight.json",
        "U_h": "U_h.weight.json",
        "W_h_bias": "W_h.bias.json"
    }

    parameters: Dict[str, Union[List, torch.tensor]] = {}

    for param_name, file_name in file_names.items():
        with open(f"{base_path}/{file_name}", "r", encoding="utf-8") as f:
            if param_name == "vocab":
                parameters[param_name] = json.load(f)
            else:
                parameters[param_name] = torch.tensor(json.load(f))

    return parameters

In [None]:
path: str = r"D:\deepfake_detection_model\d7071db0908e"

parms = load_parameters(path)
parms

{'vocab': ['\t',
  '\n',
  '\x0b',
  '\x0c',
  '\r',
  ' ',
  '!',
  '"',
  '#',
  '$',
  '%',
  '&',
  "'",
  '(',
  ')',
  '*',
  '+',
  ',',
  '-',
  '.',
  '/',
  '0',
  '1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  ':',
  ';',
  '<',
  '=',
  '>',
  '?',
  '@',
  'A',
  'B',
  'C',
  'D',
  'E',
  'F',
  'G',
  'H',
  'I',
  'J',
  'K',
  'L',
  'M',
  'N',
  'O',
  'P',
  'Q',
  'R',
  'S',
  'T',
  'U',
  'V',
  'W',
  'X',
  'Y',
  'Z',
  '[',
  '\\',
  ']',
  '^',
  '_',
  '`',
  'a',
  'b',
  'c',
  'd',
  'e',
  'f',
  'g',
  'h',
  'i',
  'j',
  'k',
  'l',
  'm',
  'n',
  'o',
  'p',
  'q',
  'r',
  's',
  't',
  'u',
  'v',
  'w',
  'x',
  'y',
  'z',
  '{',
  '|',
  '}',
  '~',
  'Н',
  'а',
  'б',
  'в',
  'г',
  'д',
  'е',
  'ж',
  'з',
  'и',
  'й',
  'к',
  'л',
  'м',
  'н',
  'о',
  'п',
  'р',
  'с',
  'т',
  'у',
  'ф',
  'х',
  'ц',
  'ч',
  'ш',
  'щ',
  'ь',
  'ю',
  'я',
  'є',
  'і',
  'ї'],
 'embeddings': tensor([[ 1.9269,  1.4873,  0.9007

In [None]:
class ElmanRNN(nn.Module):
    def __init__(self, embeddings, W_h, W_y, U_h, W_h_bias, vocab):
        super(ElmanRNN, self).__init__()

        self.embeddings = nn.Parameter(embeddings)
        self.W_h = nn.Parameter(W_h)
        self.W_y = nn.Parameter(W_y)
        self.U_h = nn.Parameter(U_h)
        self.W_h_bias = nn.Parameter(W_h_bias)

        self.vocab = vocab
        self.hidden_size = W_h.shape[0]

    def forward(self, input_char: str, hidden_state: torch.tensor):
        embedded_input = self.embeddings[self.vocab.index(input_char)]

        hidden_state = torch.tanh(self.W_h @ embedded_input + self.W_h_bias + self.U_h @ hidden_state)

        output_logits = self.W_y @ hidden_state

        return output_logits, hidden_state

In [None]:
def decode_message(rnn_model: nn.Module):
    device = next(model.parameters()).device

    hidden_state = torch.zeros(rnn_model.hidden_size, device=device)
    decoded_message = ""
    current_char = "["

    while True:
        output_logits, hidden_state = rnn_model(current_char, hidden_state)

        next_char_idx = torch.argmax(output_logits).item()
        current_char = rnn_model.vocab[next_char_idx]

        if current_char == "]":
            break

        decoded_message += current_char

    return decoded_message

In [None]:
model = ElmanRNN(**parms)

In [None]:
decode_message(model)

'Назва виконавця: https://www.youtube.com/watch?v=dtSmUF0vMKE'

In [None]:
len(model.vocab)

133

In [None]:
model.hidden_size

160

In [None]:
model.embeddings.shape

torch.Size([133, 128])

In [None]:
def message_to_indices(message: str, vocab: List):
    return [vocab.index(char) for char in message]

In [None]:
def train_model(model: nn.Module, message: str, vocab: List, epochs: int = 1000, learning_rate: float = 0.01):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f'Device: {device}')

    model.to(device)
    params_to_train = [param for name, param in model.named_parameters() if 'embedding' not in name]

    optimizer = optim.Adam(params_to_train, lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    indices = message_to_indices(message, vocab)
    hidden_state = torch.zeros(model.hidden_size, device=device)

    for epoch in range(epochs):
        optimizer.zero_grad()

        total_loss = 0
        for i in range(len(indices) - 1):
            input_char = vocab[indices[i]]
            target_char = vocab[indices[i + 1]]

            hidden_state_copy = hidden_state.clone().detach()
            output_logits, hidden_state = model(input_char, hidden_state_copy)
            target_idx = vocab.index(target_char)

            loss = criterion(output_logits.view(1, -1), torch.tensor([target_idx], device=device))
            total_loss += loss.item()

            loss.backward(retain_graph=True)

        optimizer.step()

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss}")

In [None]:
message = "[Attention Is All You Need]"

In [None]:
model = ElmanRNN(**parms)

train_model(model, message, parms.get('vocab'))

decoded_message = decode_message(model)

print("Decoded message:", decoded_message)

Device: cuda
Epoch 0, Loss: 176.5772466659546
Epoch 100, Loss: 0.006402288526714983
Epoch 200, Loss: 0.0034013375061476836
Epoch 300, Loss: 0.0022314672353331844
Epoch 400, Loss: 0.0016081844585187355
Epoch 500, Loss: 0.0012261473214039142
Epoch 600, Loss: 0.0009729614956768273
Epoch 700, Loss: 0.0007961823239384103
Epoch 800, Loss: 0.000665772336333248
Epoch 900, Loss: 0.0005663549918608624
Decoded message: Attention Is All You Need


In [None]:
for name, param in model.named_parameters():
    print(f"Parameter name: {name}")
    print(f"Parameter shape: {param.shape}")
    print(f"Parameter values: {param}")
    print("-" * 40)

Parameter name: embeddings
Parameter shape: torch.Size([133, 128])
Parameter values: Parameter containing:
tensor([[ 1.9269,  1.4873,  0.9007,  ...,  0.3399,  0.7200,  0.4114],
        [ 1.9312,  1.0119, -1.4364,  ...,  0.5655,  0.5058,  0.2225],
        [-0.6855,  0.5636, -1.5072,  ...,  0.8541, -0.4901, -0.3595],
        ...,
        [-0.7694,  0.7945, -0.1317,  ...,  0.2118,  1.0113,  0.4264],
        [-0.1265, -2.2684,  1.5827,  ...,  2.4251, -1.1458,  1.2681],
        [ 0.2754,  2.2441,  0.5156,  ...,  1.5713, -1.2235, -0.6678]],
       device='cuda:0', requires_grad=True)
----------------------------------------
Parameter name: W_h
Parameter shape: torch.Size([160, 128])
Parameter values: Parameter containing:
tensor([[ 0.0777,  0.0500, -0.0421,  ..., -0.0682,  0.0316, -0.0183],
        [-0.0734,  0.0317, -0.0095,  ..., -0.1492,  0.1381, -0.0488],
        [ 0.0033,  0.0045, -0.0050,  ...,  0.1789,  0.1398,  0.1248],
        ...,
        [ 0.0381,  0.1209,  0.1669,  ..., -0.0720, 

In [None]:
model.hidden_size

160

In [None]:
model.embeddings.shape

torch.Size([133, 128])

In [None]:
len(model.vocab)

133

In [None]:
def save_tensor_to_json(tensor: torch.tensor, file_path: str):
    tensor_list = tensor.tolist()
    with open(file_path, 'w') as f:
        json.dump(tensor_list, f)

In [None]:
params_to_save: Dict[str, str] = {
    "W_h": "W_h.my.weight.json",
    "W_y": "W_y.my.weight.json",
    "U_h": "U_h.my.weight.json",
    "W_h_bias": "W_h.my.bias.json"
}


for param_name, file_path in params_to_save.items():
    tensor = getattr(model, param_name)

    save_tensor_to_json(tensor, file_path)