In [None]:
import torch.nn as nn
import torch

In [None]:
class Encoder(nn.Module):

  def __init__(self, embedding_size, hidden_size, vocab_size):
    super().__init__(self)

    # encoder initialization
    self.embedding = nn.Embedding(num_embeddings=vocab_size,
                             embedding_dim=embedding_size)
    self.encoder_lstm = nn.LSTM(input_size=embedding_size,
                           hidden_size=hidden_size,
                           batch_first=True)

  def forward(self,input):
    # network flow
    embedding_input = self.embedding(input)
    encoder_outputs, (final_hidden_state, final_cell_state) = self.encoder_lstm(embedding_input)

    return encoder_outputs, final_hidden_state, final_cell_state




In [None]:
class Decoder(nn.Module):
  def __init__(self,
               vocab_size_tr,
               embedding_dim,
               hidden_size,
               max_len=20,
               sos_token=1,
               ):

    super().__init__()
    self.MAX_LEN = max_len
    self.SOS_TOKEN = sos_token

    # Layers
    self.embedding_layer = nn.Embedding(vocab_size_tr, embedding_dim)
    self.lstm_layer = nn.LSTM(embedding_dim, hidden_size, batch_first=True)
    self.fnn = nn.Linear(hidden_size ,vocab_size_tr)
    self.attention_vector = Attention(hidden_size)

  def forward(self,
              encoder_outputs,
              hidden_state,
              cell_state,
              target_output=None):

    # take the batch_size from encoder ouput since it got directly from trainloader that defines teh batchsize
    batch_size = encoder_outputs.shape[0]

    decoder_input = torch.empty(size=(batch_size,1),dtype=torch.long).fill_(self.SOS_TOKEN)

    # initializing the first token decoder input <start-token>
    decoder_outputs = []

    for i in range(self.MAX_LEN):
      output_logits ,hidden_state = self.forward_step(encoder_outputs,decoder_input, hidden_state, cell_state)
      decoder_outputs.append(output_logits.unsqueeze(1))
      # decoder ouput = [(32,vocab_size),...(32,vocab_size)], this list will have max_len item , lastly we will concat this to make (32,max_len,vocab_size)

      # teacher_forcing, occurs if we give target_output in the decoder
      if target_output:
        decoder_input = target_output[:,i].unsqueeze(1)
      else:
        _, decoder_input = output_logits.topk(1,dim=-1)

    decoder_final_output = torch.cat(decoder_outputs,dim=1)

    return decoder_final_output

  def forward_step(self,encoder_outputs, decoder_input, hidden_state, cell_state):

    embedded_decoder_input = self.embedding_layer(decoder_input)
    # embedded shape : (32,1,embedd_size), here 1 , becuase we are giving each word or token to decoder and make it predict next word

    lstm_output, (decoder_hidden, decoder_cell) = self.lstm_layer(embedded_decoder_input, (hidden_state, cell_state))
    # lstm_output: (32,1,hidden_size)

    output_logit = self.fnn(lstm_output.squeeze(1))
    # squeeze (32,1,hidden_size) -> 32,hidden_size
    # ouput_logits: (32,vocab_size) , 32 prediction of word , we will pic top item
    hidden_state = self.attention_vector(encoder_outputs, decoder_hidden)

    return output_logit ,hidden_state , cell_state

In [None]:
class Attention(nn.Module):
  def __init__(self, hidden_size):
    super().__init__(self)

    self.network = nn.Sequential(
          nn.Linear(2*hidden_size,hidden_size),
          nn.SELU(),
          nn.Linear(hidden_size,1),
          nn.Softmax(dim=1)
        )


  def forward(self,encoder_outputs,hidden_state):

    # Concat encoeder_output and hidden_state, encoder_output shape = (32,timestept,hidden_size), hidden_state shape = 32,1,hidden_side
    # first we need to make it same shape to concat hidden_state should be 32,timestep hidden_size, timestpe will be repeatation of same one vector from hidden size
    encoder_timestep_len = encoder_outputs.size(1)

    # hidden_size will be (1,32,hidden_size) according to doc we need to change
    hidden_state = hidden_state.permute(1,0,2) # shape: (32,1,5)

    # hidden_state repetation
    hidden_repeated = hidden_state(1,encoder_timestep_len,1)

    # concat with encoder_output
    encoder_hidden_concat = torch.concat((encoder_outputs,hidden_repeated),dim=-1) # shape : 32,timestep,hidden_size*2

    weights = self.network(encoder_hidden_concat) # (32,timestepe,1)

    # for bmm
    weights = weights.permute(0,2,1) # (32,1,timesteps)

    context_vectores = weights.bmm(encoder_outputs) # 32,1,hidden_size

    # convert back to the way lstm take hidden state
    context_vectores = context_vectores.permute(1,0,2)

    return context_vectores












In [None]:
class Seq2SeqAttentionModel(nn.Module):
  def __init__(self,
               embedding_size,
               hidden_size,
               vocab_size_en,
               vocab_size_tr):
    super().__init__(self)

    self.encoder = Encoder(embedding_size,
                           hidden_size,
                           vocab_size_en)

    self.decoder = Decoder(embedding_size,
                           hidden_size,
                           vocab_size_tr,
                           max_len=20,
                           sos_token=1)

    def forward(self,input,target_output):
      encoder_outputs, encoder_hidden_state, encoder_cell_state = self.encoder(input)
      decoder_output = self.decoder(encoder_outputs, encoder_hidden_state, encoder_cell_state, target_output)

      return decoder_output


In [None]:
class Encoder(nn.Module):

  def __init__(self, embedding_size, hidden_size, vocab_size):
    super().__init__(self)

    # encoder initialization
    self.embedding = nn.Embedding(num_embeddings=vocab_size,
                             embedding_dim=embedding_size)
    self.encoder_lstm = nn.LSTM(input_size=embedding_size,
                           hidden_size=hidden_size,
                           batch_first=True)

  def forward(self,input):
    # network flow
    embedding_input = self.embedding(input)
    encoder_outputs, (final_hidden_state, final_cell_state) = self.encoder_lstm(embedding_input)

    return encoder_outputs, final_hidden_state, final_cell_state



class Decoder(nn.Module):
  def __init__(self,
               vocab_size_tr,
               embedding_dim,
               hidden_size,
               max_len=20,
               sos_token=1,
               ):

    super().__init__()
    self.MAX_LEN = max_len
    self.SOS_TOKEN = sos_token

    # Layers
    self.embedding_layer = nn.Embedding(vocab_size_tr, embedding_dim)
    self.lstm_layer = nn.LSTM(embedding_dim, hidden_size, batch_first=True)
    self.fnn = nn.Linear(hidden_size ,vocab_size_tr)
    self.attention_vector = Attention(hidden_size)

  def forward(self,
              encoder_outputs,
              hidden_state,
              cell_state,
              target_output=None):

    # take the batch_size from encoder ouput since it got directly from trainloader that defines teh batchsize
    batch_size = encoder_outputs.shape[0]

    decoder_input = torch.empty(size=(batch_size,1),dtype=torch.long).fill_(self.SOS_TOKEN)

    # initializing the first token decoder input <start-token>
    decoder_outputs = []

    for i in range(self.MAX_LEN):
      output_logits ,hidden_state = self.forward_step(encoder_outputs,decoder_input, hidden_state, cell_state)
      decoder_outputs.append(output_logits.unsqueeze(1))
      # decoder ouput = [(32,vocab_size),...(32,vocab_size)], this list will have max_len item , lastly we will concat this to make (32,max_len,vocab_size)

      # teacher_forcing, occurs if we give target_output in the decoder
      if target_output:
        decoder_input = target_output[:,i].unsqueeze(1)
      else:
        _, decoder_input = output_logits.topk(1,dim=-1)

    decoder_final_output = torch.cat(decoder_outputs,dim=1)

    return decoder_final_output

  def forward_step(self,encoder_outputs, decoder_input, hidden_state, cell_state):

    embedded_decoder_input = self.embedding_layer(decoder_input)
    # embedded shape : (32,1,embedd_size), here 1 , becuase we are giving each word or token to decoder and make it predict next word

    lstm_output, (decoder_hidden, decoder_cell) = self.lstm_layer(embedded_decoder_input, (hidden_state, cell_state))
    # lstm_output: (32,1,hidden_size)

    output_logit = self.fnn(lstm_output.squeeze(1))
    # squeeze (32,1,hidden_size) -> 32,hidden_size
    # ouput_logits: (32,vocab_size) , 32 prediction of word , we will pic top item
    hidden_state = self.attention_vector(encoder_outputs, decoder_hidden)

    return output_logit ,hidden_state , cell_state


class Attention(nn.Module):
  def __init__(self, hidden_size):
    super().__init__(self)

    self.network = nn.Sequential(
          nn.Linear(2*hidden_size,hidden_size),
          nn.SELU(),
          nn.Linear(hidden_size,1),
          nn.Softmax(dim=1)
        )


  def forward(self,encoder_outputs,hidden_state):

    # Concat encoeder_output and hidden_state, encoder_output shape = (32,timestept,hidden_size), hidden_state shape = 32,1,hidden_side
    # first we need to make it same shape to concat hidden_state should be 32,timestep hidden_size, timestpe will be repeatation of same one vector from hidden size
    encoder_timestep_len = encoder_outputs.size(1)

    # hidden_size will be (1,32,hidden_size) according to doc we need to change
    hidden_state = hidden_state.permute(1,0,2) # shape: (32,1,5)

    # hidden_state repetation
    hidden_repeated = hidden_state.repeat(1,encoder_timestep_len,1)

    # concat with encoder_output
    encoder_hidden_concat = torch.concat((encoder_outputs,hidden_repeated),dim=-1) # shape : 32,timestep,hidden_size*2

    weights = self.network(encoder_hidden_concat) # (32,timestepe,1)

    # for bmm
    weights = weights.permute(0,2,1) # (32,1,timesteps)

    context_vectores = weights.bmm(encoder_outputs) # 32,1,hidden_size

    # convert back to the way lstm take hidden state
    context_vectores = context_vectores.permute(1,0,2)

    return context_vectores


class Seq2SeqAttentionModel(nn.Module):
  def __init__(self,
               embedding_size,
               hidden_size,
               vocab_size_en,
               vocab_size_tr):
    super().__init__(self)

    self.encoder = Encoder(embedding_size,
                           hidden_size,
                           vocab_size_en)

    self.decoder = Decoder(embedding_size,
                           hidden_size,
                           vocab_size_tr,
                           max_len=20,
                           sos_token=1)

  def forward(self,input,target_output):
      encoder_outputs, encoder_hidden_state, encoder_cell_state = self.encoder(input)
      decoder_output = self.decoder(encoder_outputs, encoder_hidden_state, encoder_cell_state, target_output)

      return decoder_output




In [None]:
pip install torchtext


Collecting torchtext
  Downloading torchtext-0.18.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.9 kB)
Downloading torchtext-0.18.0-cp311-cp311-manylinux1_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.18.0


In [None]:
from torchtext.datasets import Multi30k

OSError: /usr/local/lib/python3.11/dist-packages/torchtext/lib/libtorchtext.so: undefined symbol: _ZN5torch3jit17parseSchemaOrNameERKSs

In [None]:
import torch.data.

print(torch.__version__)
print()


2.5.1+cu121

