- Very Basic Implementation of Generative Model using Bigram LSTM Model
- Defined Everything custom so that it would help you to understand the basic concept behind generative models


### Training Corpus You can load your own dataset too

In [94]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict

corpus = [
    "I like natural language processing",
    "I like deep learning",
    "I like PyTorch"
    "I love to work on coding"
]



### Custom Simple tokenizer

In [None]:

tokens = [sentence.lower().split() for sentence in corpus]

# Create a vocabulary
vocab = set([word for sentence in tokens for word in sentence])

# Create word-to-index and index-to-word mappings
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}

### Bigram counts for each word

In [95]:

bigram_counts = {}

for sentence in tokens:

    for i in range(1, len(sentence)):

        current_word = sentence[i]
        previous_word = sentence[i - 1]

        if previous_word not in bigram_counts:
            bigram_counts[previous_word] = {}

        if current_word not in bigram_counts[previous_word]:
            bigram_counts[previous_word][current_word] = 0

        bigram_counts[previous_word][current_word] += 1


print(bigram_counts)

{'i': {'like': 3}, 'like': {'natural': 1, 'deep': 1, 'pytorchi': 1}, 'natural': {'language': 1}, 'language': {'processing': 1}, 'deep': {'learning': 1}, 'pytorchi': {'love': 1}, 'love': {'to': 1}, 'to': {'work': 1}, 'work': {'on': 1}, 'on': {'coding': 1}}


### Bigram Model with LSTM

In [96]:

class BigramLSTM(nn.Module):
    def __init__(self, vocab_size,embedding_dim,hidden_dim,):
        super(BigramLSTM, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_sim=embedding_dim
        self.hidden_dim=hidden_dim
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds)
        output = self.fc(lstm_out[:, -1, :])
        return output

# Hyperparameters
learning_rate = 0.01
num_epochs = 100
embedding_dim=32
hidden_dim=16
# Initialize the model and optimizer
model = BigramLSTM(len(vocab),embedding_dim,hidden_dim)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [97]:
bigram_counts.items()

dict_items([('i', {'like': 3}), ('like', {'natural': 1, 'deep': 1, 'pytorchi': 1}), ('natural', {'language': 1}), ('language', {'processing': 1}), ('deep', {'learning': 1}), ('pytorchi', {'love': 1}), ('love', {'to': 1}), ('to', {'work': 1}), ('work', {'on': 1}), ('on', {'coding': 1})])

In [98]:
for context_word_ix, next_word_ixs in bigram_counts.items():
    print(context_word_ix)
    print(next_word_ixs)

i
{'like': 3}
like
{'natural': 1, 'deep': 1, 'pytorchi': 1}
natural
{'language': 1}
language
{'processing': 1}
deep
{'learning': 1}
pytorchi
{'love': 1}
love
{'to': 1}
to
{'work': 1}
work
{'on': 1}
on
{'coding': 1}


### Model training

In [99]:

for epoch in range(num_epochs):
    total_loss = 0
    for context_word_ix, next_word_ixs in bigram_counts.items():
        context_tensor = torch.tensor([[word_to_ix[context_word_ix]]])
        for next_word_ix, count in next_word_ixs.items():
            optimizer.zero_grad()
            output = model(context_tensor)
            loss = nn.CrossEntropyLoss()(output, torch.tensor([word_to_ix[next_word_ix]]))
            total_loss += loss.item()
            loss.backward()

            optimizer.step()
    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {total_loss}')


Epoch 10, Loss: 6.3482209369540215
Epoch 20, Loss: 4.003192204982042
Epoch 30, Loss: 3.676830952987075
Epoch 40, Loss: 3.5573324924334884
Epoch 50, Loss: 3.497047302313149
Epoch 60, Loss: 3.461038926616311
Epoch 70, Loss: 3.4393726114649326
Epoch 80, Loss: 3.4246906868647784
Epoch 90, Loss: 3.41412171931006
Epoch 100, Loss: 3.406180937658064


### Simple Testing of Model

In [100]:
context = "i like deep"
context_tokens = context.split()
context_tensor = torch.tensor([[word_to_ix[word] for word in context_tokens]])
output = model(context_tensor)
next_word_ix = torch.argmax(output).item()
next_word = ix_to_word[next_word_ix]
print(f"Given the context '{context}', the model predicts the next word is '{next_word}'")

Given the context 'i like deep', the model predicts the next word is 'learning'
