In [1]:
#to define the models
import torch
import torch.nn as nn

In [2]:
#To get GPU details

import torch

def get_gpu_details():
    t = torch.cuda.get_device_properties(0).total_memory
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    print(torch.cuda.get_device_name())
    print(f'Total GPU Memory {t} B, Reserved GPU Memory {r} B, Allocated GPU Memory {a} B')

if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'
print(f'Current Device: {device}')

if device == 'cuda:0':
    torch.cuda.empty_cache()
    get_gpu_details()


Current Device: cuda:0
NVIDIA GeForce 940MX
Total GPU Memory 4240703488 B, Reserved GPU Memory 0 B, Allocated GPU Memory 0 B


In [3]:
import pickle

# Load vocab
with open('Model/vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)

# Load model
model_path = "Model/trained_model.pth"

In [4]:
class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (
            self.head_dim * heads == embed_size
        ), "Embedding size needs to be divisible by heads"

        self.values = nn.Linear(embed_size, embed_size)
        self.keys = nn.Linear(embed_size, embed_size)
        self.queries = nn.Linear(embed_size, embed_size)
        self.fc_out = nn.Linear(embed_size, embed_size)

    def forward(self, values, keys, query, mask):
        # Get number of training examples
        N = query.shape[0]

        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]

        values = self.values(values)  # (N, value_len, embed_size)
        keys = self.keys(keys)  # (N, key_len, embed_size)
        queries = self.queries(query)  # (N, query_len, embed_size)

        # Split the embedding into self.heads different pieces
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = queries.reshape(N, query_len, self.heads, self.head_dim)

        # Einsum does matrix mult. for query*keys for each training example
        # with every other training example, don't be confused by einsum
        # it's just how I like doing matrix multiplication & bmm

        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        # queries shape: (N, query_len, heads, heads_dim),
        # keys shape: (N, key_len, heads, heads_dim)
        # energy: (N, heads, query_len, key_len)

        # Mask padded indices so their weights become 0
        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))

        # Normalize energy values similarly to seq2seq + attention
        # so that they sum to 1. Also divide by scaling factor for
        # better stability
        attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
        # attention shape: (N, heads, query_len, key_len)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.heads * self.head_dim
        )
        # attention shape: (N, heads, query_len, key_len)
        # values shape: (N, value_len, heads, heads_dim)
        # out after matrix multiply: (N, query_len, heads, head_dim), then
        # we reshape and flatten the last two dimensions.

        out = self.fc_out(out)
        # Linear layer doesn't modify the shape, final shape will be
        # (N, query_len, embed_size)

        return out

In [5]:
class Encoder(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        embed_size,
        num_layers,
        heads,
        device,
        forward_expansion,
        dropout,
        max_length,
    ):

        super(Encoder, self).__init__()
        self.embed_size = embed_size
        self.device = device
        self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [
                TransformerBlock(
                    embed_size,
                    heads,
                    dropout=dropout,
                    forward_expansion=forward_expansion,
                )
                for _ in range(num_layers)
            ]
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        out = self.dropout(
            (self.word_embedding(x) + self.position_embedding(positions))
        )

        # In the Encoder the query, key, value are all the same, it's in the
        # decoder this will change. This might look a bit odd in this case.
        for layer in self.layers:
            out = layer(out, out, out, mask)

        return out



In [6]:
class Decoder(nn.Module):
    def __init__(
        self,
        trg_vocab_size,
        embed_size,
        num_layers,
        heads,
        forward_expansion,
        dropout,
        device,
        max_length,
    ):
        super(Decoder, self).__init__()
        self.device = device
        self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [
                DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
                for _ in range(num_layers)
            ]
        )
        self.fc_out = nn.Linear(embed_size, trg_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_out, src_mask, trg_mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        x = self.dropout((self.word_embedding(x) + self.position_embedding(positions)))

        for layer in self.layers:
            x = layer(x, enc_out, enc_out, src_mask, trg_mask)

        out = self.fc_out(x)

        return out


In [7]:
class DecoderBlock(nn.Module):
    def __init__(self, embed_size, heads, forward_expansion, dropout, device):
        super(DecoderBlock, self).__init__()
        self.norm = nn.LayerNorm(embed_size)
        self.attention = SelfAttention(embed_size, heads=heads)
        self.transformer_block = TransformerBlock(
            embed_size, heads, dropout, forward_expansion
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, value, key, src_mask, trg_mask):
        attention = self.attention(x, x, x, trg_mask)
        query = self.dropout(self.norm(attention + x))
        out = self.transformer_block(value, key, query, src_mask)
        return out

In [8]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size),
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, value, key, query, mask):
        attention = self.attention(value, key, query, mask)

        # Add skip connection, run through normalization and finally dropout
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out

In [9]:
class Transformer(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        trg_vocab_size,
        src_pad_idx,
        trg_pad_idx,
        embed_size,
        num_layers,
        forward_expansion,
        heads,
        dropout,
        device,
        max_length=100,
    ):
        super(Transformer, self).__init__()

        self.encoder = Encoder(
            src_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length,
        )

        self.decoder = Decoder(
            trg_vocab_size,
            embed_size,
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length,
        )

        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device

    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(-2).unsqueeze(-2)
        return src_mask.to(self.device)

    def make_trg_mask(self, trg):
        trg_len = trg.shape[1]
        trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(1, 1, trg_len, trg_len)
        return trg_mask.to(self.device)

    def forward(self, src, trg=None):
        if trg is not None:
            # Training
            src_mask = self.make_src_mask(src)
            trg_mask = self.make_trg_mask(trg)
            enc_src = self.encoder(src, src_mask)
            output = self.decoder(trg, enc_src, src_mask, trg_mask)
            return output
        else:
            # Inference
            return self.inference(src)

    # def inference(self, src):
    #     src_mask = self.make_src_mask(src)
    #     enc_src = self.encoder(src, src_mask)
    #     trg_init_token = torch.tensor([[self.trg_pad_idx]]).to(src.device)  # <pad> token to start decoding
    #     trg_tokens = [trg_init_token]
    #     for i in range(self.max_length):
    #         trg_tensor = torch.cat(trg_tokens, dim=1)
    #         trg_mask = self.make_trg_mask(trg_tensor)
    #         output = self.decoder(trg_tensor, enc_src, src_mask, trg_mask)
    #         pred_token = output.argmax(dim=-1)[:,-1].unsqueeze(1)  # Predict next token
    #         trg_tokens.append(pred_token)
    #         if pred_token.item() == self.trg_pad_idx:
    #             break
    #     return torch.cat(trg_tokens, dim=1)
    def inference(self, src):
        src_mask = self.make_src_mask(src)
        enc_src = self.encoder(src, src_mask)
        trg_init_token = torch.tensor([[self.trg_pad_idx]]).to(src.device)  # <pad> token to start decoding
        trg_tokens = [trg_init_token]
        for i in range(self.max_length):
            trg_tensor = torch.cat(trg_tokens, dim=1)
            trg_mask = self.make_trg_mask(trg_tensor)
            output = self.decoder(trg_tensor, enc_src, src_mask, trg_mask)
            pred_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)  # Predict next token
            trg_tokens.append(pred_token)
            if pred_token.item() == self.trg_pad_idx:
                break
        return torch.cat(trg_tokens, dim=1)


In [10]:
class Transformer(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        trg_vocab_size,
        src_pad_idx,
        trg_pad_idx,
        embed_size,
        num_layers,
        forward_expansion,
        heads,
        dropout,
        device,
        max_length,
    ):
        super(Transformer, self).__init__()

        self.max_length = max_length  # Set max_length as an attribute
        
        self.encoder = Encoder(
            src_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length,
        )

        self.decoder = Decoder(
            trg_vocab_size,
            embed_size,
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length,
        )

        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
    
    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(-2).unsqueeze(-2)
        return src_mask.to(self.device)

    def make_trg_mask(self, trg):
        trg_len = trg.shape[1]
        trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(1, 1, trg_len, trg_len)
        return trg_mask.to(self.device)

    def forward(self, src, trg=None):
        if trg is not None:
            # Training
            src_mask = self.make_src_mask(src)
            trg_mask = self.make_trg_mask(trg)
            enc_src = self.encoder(src, src_mask)
            output = self.decoder(trg, enc_src, src_mask, trg_mask)
            return output
        else:
            # Inference
            return self.inference(src)

    # def inference(self, src):
    #     src_mask = self.make_src_mask(src)
    #     enc_src = self.encoder(src, src_mask)
    #     trg_init_token = torch.tensor([[self.trg_pad_idx]]).to(src.device)  # <pad> token to start decoding
    #     trg_tokens = [trg_init_token]
    #     for i in range(self.max_length):
    #         trg_tensor = torch.cat(trg_tokens, dim=1)
    #         trg_mask = self.make_trg_mask(trg_tensor)
    #         output = self.decoder(trg_tensor, enc_src, src_mask, trg_mask)
    #         pred_token = output.argmax(dim=-1)[:,-1].unsqueeze(1)  # Predict next token
    #         trg_tokens.append(pred_token)
    #         if pred_token.item() == self.trg_pad_idx:
    #             break
    #     return torch.cat(trg_tokens, dim=1)
    def inference(self, src):
        src_mask = self.make_src_mask(src)
        enc_src = self.encoder(src, src_mask)
        trg_init_token = torch.tensor([[self.trg_pad_idx]]).to(src.device)  # <pad> token to start decoding
        trg_tokens = [trg_init_token]
        for i in range(self.max_length):
            trg_tensor = torch.cat(trg_tokens, dim=1)
            trg_mask = self.make_trg_mask(trg_tensor)
            output = self.decoder(trg_tensor, enc_src, src_mask, trg_mask)
            pred_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)  # Predict next token
            trg_tokens.append(pred_token)
            if pred_token.item() == self.trg_pad_idx:
                break
        return torch.cat(trg_tokens, dim=1)


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embed_size = 256
num_layers = 4
forward_expansion = 4
heads =4
dropout = 0.019
learning_rate = 0.01
num_epochs = 20
max_length = 100

In [12]:
model = Transformer(
    src_vocab_size=len(vocab),
    trg_vocab_size=len(vocab),
    src_pad_idx=vocab["<pad>"],
    trg_pad_idx=vocab["<pad>"],
    embed_size=embed_size,
    num_layers=num_layers,
    forward_expansion=forward_expansion,
    heads=heads,
    dropout=dropout,
    device=device,
    max_length=max_length 
).to(device)


In [13]:
# Assuming vocab has a method or attribute to access index-to-token mapping
index_to_token = {index: vocab.itos[index] for index in range(len(vocab))}

# Function to convert tensor of numerical indices to string
def tensor_to_string(tensor, index_to_token, pad_token='<pad>'):
    tokens = [index_to_token[index.item()] for index in tensor if index.item() != vocab[pad_token]]
    return ' '.join(tokens)

def convert_string(value):
    question_tensor = torch.tensor(value)
    # question_tensor = torch.tensor(value).clone().detach()
    question_string = tensor_to_string(question_tensor, index_to_token)
    return question_string
    

In [14]:
act_val = [3398, 3249, 1610,   29,    2,  245,  402, 4031,   19,    2,  575, 1887,
            3,    2,   18,   10,    5, 1207,   13, 2240,  272,  148,    2,   62,
          761,    7,    2,  145,    4,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1]

convert_string(act_val)

'maidens schedule pine by the entrance along plated with the agama appeal of the nyatapola temple , way a drawing they purpose the there remarkable and the festival .'

In [15]:
pred_val =[1299, 1376,  245,   29,    2,  245,  705,  824,   19,    2,   18, 1887,
            3,   18,   18,   10,    5,   28,   13, 2240, 1906,   19,    2,  113,
          761,    7, 1171,  113,    3,  507,    3,    3,    3,    3,    4,    2,
            2,    3,    2,   27,   27,   27,    4,  147,    2,    8,   19,   19,
            2,    2,    2,    4,    2,    2,    2,   19,    2,    2,    7,    2,
           27,    4,    2,   27,    4,    4,    5,  147,   19,    2,    4,    2,
          152,    2,    4,    2,    2,    3,    2,    2,    2,    4,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    7,    2,    4,   19,    2,
            2,    4,    2]

convert_string(pred_val)

'chowkot salvaged entrance by the entrance material divided with the nyatapola appeal of nyatapola nyatapola temple , who a drawing geography with the community remarkable and eyes community of sage of of of of . the the of the its its its . durvasa the in with with the the the . the the the with the the and the its . the its . . , durvasa with the . the condition the . the the of the the the . the the the the the the the the the and the . with the the . the'

In [16]:
pred1 = [  56,   10,    9,   13,   11,   38,   56,   29,    5,   13,    9,  253,
           16,   13, 1011,  104,   70,    2,    7,  315,    4,   33,   11,    4,
            4,    8,    4,    2,    4,    8,    4,    4,    4,    4,    4,    2,
            4,    8,    4,    4,    4,   27,    4,    4,    4,    4,    4,    4,
            2,    4,    4,    4,    4,    4,    2,    4,    4,    4,    4,    4,
            4,    4,   11,    4,    4,    4,    4,    4,    4,    4,    4,   15,
            4,    4,    4,    2,    4,    4,  897,    4,    2,    4,    2,    4,
            4,    4,    4,    4,    5,    5,  897,    4,    2,    4,    4,  527,
            4,    4,    4]

convert_string(pred1)

'reconstruction temple is a to windows reconstruction by , a is site as a 000 such role the and roof . it to . . in . the . in . . . . . the . in . . . its . . . . . . the . . . . . the . . . . . . . to . . . . . . . . bhaktapur . . . the . . them . the . the . . . . . , , them . the . . post . . .'

In [21]:
act1 =   [   2, 2551,  693,    3,   23,   16,   13,  908,    8,  260,    5,  366,
            2,  124, 3202,    7,  962, 1996,    8,   50,  127,    4,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1]

convert_string(act1)

'the pieces depicting of how as a final in srinath , describe the building melodious and involvement hoist in gate wall .'

In [38]:
testq = "What is the height of Nyatapola ?"

In [39]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download necessary resources for NLTK
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Preprocessing function
def preprocess_sentence(sentence):
    # Lowercase and strip
    sentence = sentence.lower().strip()
    # Creating a space between a word and the punctuation following it
    sentence = re.sub(r"([?.!,])", r" \1 ", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)
    # Removing contractions
    sentence = re.sub(r"i'm", "i am", sentence)
    sentence = re.sub(r"he's", "he is", sentence)
    sentence = re.sub(r"she's", "she is", sentence)
    sentence = re.sub(r"it's", "it is", sentence)
    sentence = re.sub(r"that's", "that is", sentence)
    sentence = re.sub(r"what's", "that is", sentence)
    sentence = re.sub(r"where's", "where is", sentence)
    sentence = re.sub(r"how's", "how is", sentence)
    sentence = re.sub(r"\'ll", " will", sentence)
    sentence = re.sub(r"\'ve", " have", sentence)
    sentence = re.sub(r"\'re", " are", sentence)
    sentence = re.sub(r"\'d", " would", sentence)
    sentence = re.sub(r"\'re", " are", sentence)
    sentence = re.sub(r"won't", "will not", sentence)
    sentence = re.sub(r"can't", "cannot", sentence)
    sentence = re.sub(r"n't", " not", sentence)
    sentence = re.sub(r"n'", "ng", sentence)
    sentence = re.sub(r"'bout", "about", sentence)
    # Replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    sentence = re.sub(r"[^a-zA-Z?.!,0-9 %]+", " ", sentence)
    sentence = sentence.strip()
    return sentence

# Stop words removal and lemmatization function
def preprocess_text(text):
    # Tokenize the text
    tokens = nltk.word_tokenize(text)
    # Remove stop words
    #stop_words = set(stopwords.words("english"))
    #filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens if word.lower()]
    return " ".join(lemmatized_tokens)

# Apply preprocessing to each question and answer
testq = preprocess_sentence(testq)
testq = preprocess_text(testq)

print(testq)

what is the height of nyatapola ?


[nltk_data] Downloading package punkt to /home/abhishek/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/abhishek/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/abhishek/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [44]:
import torch
from torch.utils.data import Dataset
from torchtext.data.utils import get_tokenizer
from torch.utils.data import Dataset, DataLoader

# Define tokenizer
tokenizer = get_tokenizer("basic_english")

class TestQADataset(Dataset):
    def __init__(self, question, tokenizer, vocab, max_seq_length):
        self.question = question
        self.vocab = vocab
        self.max_seq_length = max_seq_length
        self.tokenizer = tokenizer

    def __len__(self):
        return 1

    def __getitem__(self, idx):
        question_tokens = self.tokenizer(self.question)
        question_indices = [self.vocab[token] for token in question_tokens]

        # Pad or truncate sequence to max_seq_length
        question_indices = question_indices[:self.max_seq_length] + [self.vocab['<pad>']] * (self.max_seq_length - len(question_indices))
        question_tensor = torch.tensor(question_indices, dtype=torch.long)

        return question_tensor


# Define max sequence length
max_seq_length = 100

# Convert string data into tensors
test_dataset = TestQADataset(testq, tokenizer, vocab, max_seq_length)

# Define batch size
batch_size = 1  # Set batch size to 1 for testing

# Create data loader
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Example usage of the test loader
for idx, question in enumerate(test_loader):
    # Move the tensor to the appropriate device
    question = question.to(device)  # Assuming `device` is defined elsewhere

    # Perform inference with the model using the question
    with torch.no_grad():
        output_sequence = model(question)

print(output_sequence)



tensor([[   1, 4547, 6720, 2523, 2846,  923, 6665, 5329, 2485, 3898, 3577,  337,
         6897,  509, 3706, 6706,  401, 1976, 3746, 2295,   71, 2508, 6267, 4227,
         3621, 2866, 3408, 2090, 6113,  289, 2810, 2949, 3269, 1447, 1279, 6879,
         6987, 6945, 1355, 2129, 1169, 2888, 5488, 1176, 1255, 2902, 6875, 3294,
         4562, 5706, 1224, 1656, 6959, 5772, 5123, 2477, 2646, 4159, 6745, 3408,
          367, 5586, 2505, 2419, 1495, 3096, 2754, 5908, 1542, 2761, 6545, 4634,
         3444, 6038, 1811, 2239, 1878, 6363, 4019, 6938, 3408, 6189, 5536, 1471,
         2295, 5814,  304, 2194,  858, 4113, 6137, 5983,  886, 6669, 6666, 2343,
          814,  863, 3471, 6274, 5496]], device='cuda:0')


In [None]:
# Converting tensor to list 
output_sequence = output_sequence.tolist()
print(output_sequence)

# Flatten the nested list
output_sequence= [item for sublist in output_sequence for item in sublist]
print(output_sequence)

In [41]:
# Assuming vocab has a method or attribute to access index-to-token mapping
index_to_token = {index: vocab.itos[index] for index in range(len(vocab))}

# Function to convert tensor of numerical indices to string
def tensor_to_string(tensor, index_to_token, pad_token='<pad>'):
    tokens = [index_to_token[index.item()] for index in tensor if index.item() != vocab[pad_token]]
    return ' '.join(tokens)

def convert_string(value):
    question_tensor = torch.tensor(value)
    # question_tensor = torch.tensor(value).clone().detach()
    question_string = tensor_to_string(question_tensor, index_to_token)
    return question_string
    

In [50]:
# Getting highest probability
high_output_sequence = output_sequence.argmax(dim=-1)
print(high_output_sequence)

tensor([36], device='cuda:0')


In [54]:
predicted_indices = high_output_sequence

# Convert predicted token indices to tokens
predicted_tokens = [vocab.itos[index] for index in high_output_sequence]

# Handle end-of-sequence token
eos_index = vocab.stoi['<eos>']
if eos_index in predicted_indices:
    eos_position = (predicted_indices == eos_index).nonzero(as_tuple=True)[0]
    predicted_tokens = predicted_tokens[:eos_position + 1]

# Post-processing (if needed)
# For example, remove padding tokens
predicted_tokens = [token for token in predicted_tokens if token != '<pad>']

# Join tokens into a single string (if needed)
predicted_answer = ' '.join(predicted_tokens)

# Print or use predicted answer
print(predicted_answer)



from


In [42]:
convert_string(output_sequence)

'creatures renunciant dates manifests drunk hotter solution mangsir tribes gem multi badrinatha school storytelling featured metalwork threads evolve alternative privately responsible interwoven located marked drunk agni 60 well possible mul thicker pratyavijafia promise reference gilded took plan connects realizing mark starting emancipated crumbs upanisad 50 shower doa imbued styled eras agamdev honour well brings devastation ganesh keen liveliness visitors jumgam retrofit masan generation inspection spot nside commencement somewhat digambara shameful umamaheswara plan sudden jayasthiti organize acquiring adhaar patan spot multi lun . precursor rebuilt dushyanta paramount insulting nightlife vainya atris harmoniously cited able decisions endeavors resulting plan featured fascination restorations'