In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import math
DEVICE = torch.device('cuda')

# Xây dựng class embedding vị trí cho chữ cái

In [None]:
class PositionalEmbedding(nn.Module):
    def __init__(self, embed_dim, seq_len= 500, dropout= 0.2):
        super(PositionalEmbedding, self).__init__()
        pos = torch.arange(0, seq_len).reshape(seq_len, 1)
        denominator = torch.Tensor( 10000**(torch.arange(0, embed_dim, 2)) / embed_dim)
        
        pos_embedding = torch.zeros((seq_len, embed_dim if embed_dim%2== 0 else (embed_dim + 1)))
        pos_embedding[:, 0::2] = torch.sin(pos*denominator)
        pos_embedding[:, 1::2] = torch.cos(pos*denominator)

        pos_embedding = pos_embedding[:, :embed_dim]
        
        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: torch.Tensor):
        batch_size = token_embedding.size(0)
        seq_len = token_embedding.size(1)
        pos_embedding = self.pos_embedding[:seq_len, :].unsqueeze(0).expand(batch_size, -1, -1)

        return self.dropout(pos_embedding + token_embedding)


# Xây dựng class embedding tokens

In [None]:
class TokenizedEmbedding(nn.Module):
    def __init__(self, embed_dim, vocab_size):
        super(TokenizedEmbedding, self).__init__()
        self.embedding = nn.Embedding(num_embeddings= vocab_size, embedding_dim= embed_dim)
        self.embed_dim = embed_dim
    
    def forward(self, tokens):
        return self.embedding(tokens.long()) * math.sqrt(self.embed_dim) 

# Lớp Encoder 

In [None]:
class RnnEncoder(nn.Module):
    def __init__(self, src_vocab, embed_dim, hidden_dim, n_layers, dropout, DEVICE):
        super(RnnEncoder, self).__init__()
        
        self.embedding = TokenizedEmbedding(
            embed_dim= embed_dim,
            vocab_size= len(src_vocab)
            )
        
        self.rnn_layer = nn.GRU(
            input_size= embed_dim, 
            hidden_size= hidden_dim,
            num_layers= n_layers,
            batch_first= True
            )
        
        self.norm = nn.LayerNorm(normalized_shape= hidden_dim)
        self.dropout = nn.Dropout(dropout)
        
        self.fc = nn.Linear(
            in_features= hidden_dim,
            out_features= hidden_dim,
            device= DEVICE
            )
        self.relu = nn.ReLU()
    
    def forward(self, x):
        #x là câu đầu vào [batch_size, seq_len]
        x = self.embedding(x) # [batch_size, seq_len, embed_dim]
        x, hn = self.rnn_layer(x) # hn.shape = [n_layers, batch_size, hidden_dim]
        hn = self.norm(hn)
        hn = self.relu(hn)
        hn = self.dropout(hn)
        hn = self.fc(hn)# hn.shape = [n_layers, batch_size, hidden_dim]
        return x, hn

# Lớp Decoder

In [None]:
class RnnDecoder(nn.Module):
    def __init__(self, tgt_vocab, hidden_dim, n_layers, dropout, SOS_token, DEVICE):
        super(RnnDecoder, self).__init__()
        
        self.embedding = TokenizedEmbedding(
            embed_dim= hidden_dim,
            vocab_size= len(tgt_vocab)
            )
        
        self.rnn_layer = nn.GRU(
            input_size= hidden_dim, 
            hidden_size= hidden_dim,
            num_layers= n_layers,
            batch_first= True
            )

        self.fc = nn.Linear(
            in_features= hidden_dim, 
            out_features= len(tgt_vocab)
        )        

        self.norm = nn.LayerNorm(normalized_shape= hidden_dim)
        self.dropout = nn.Dropout(dropout)
        self.device = DEVICE        
        self.sos_token = SOS_token

    def forward(self, encoder_outputs, context, target_tensor=None):
        # encoder_outputs: đầu ra của encoder cho toàn bộ chuỗi đầu vào [batch_size, seq_len, hidden_dim]
        # encoder_final_hidden: hidden state cuối cùng của encoder [n_layers, batch_size, hidden_dim]
        # target_tensor: chuỗi target thực tế (sử dụng cho teacher forcing)
        
        max_len = target_tensor.size(1) # Lấy ra độ dài câu mục tiêu
        batch_size = encoder_outputs.size(0)
        # decoder_input.shape = [batch_size, 1, hidden_dim]
        decoder_input = torch.empty((batch_size, 1), dtype= torch.long, device= self.device).fill_(self.sos_token)
        decoder_hidden = context
        decoder_outputs = []
        decoder_outputs.append(self.embedding(decoder_input))
        
        for i in range(1, max_len):
            # decoder_output.shape = [batch_size, seq_len= 1, hidden_dim]
            decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)
            else:
                decoder_input = decoder_output.detach()

        decoder_outputs = torch.cat(decoder_outputs, dim= 1)
        decoder_outputs = self.norm(decoder_outputs)
        decoder_outputs = self.dropout(decoder_outputs)
        decoder_outputs = self.fc(decoder_outputs) # [batch_size, seq_len, max_len(của câu đầu ra)]

        return decoder_outputs, decoder_hidden

    def forward_step(self, input, hidden):
        input = self.embedding(input)
        input = F.relu(input)
        output, hidden = self.rnn_layer(input, hidden)
        return output, hidden

# Model Rnn tổng quát

In [None]:
class RnnMachineTranslate(nn.Module):
    def __init__(self, encoder, decoder):
        super(RnnMachineTranslate, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x, y):
        # x là 1 bacth các câu đầu vào
        result_time_steps, context = self.encoder(x)
        decoder_outputs, decoder_hidden = self.decoder(result_time_steps, context, y)

        return decoder_outputs       