In [10]:
import torch
import torch.nn.functional as F

from nplm.utils import load_model
from nplm.data_setup import load_vocab
from nplm.model import Config, NeuralProbabilisticLanguageModel

In [11]:
vocab = load_vocab(file_path="data/vocab.pkl")

# hyperparameters
V = len(vocab)
n = 5
h = 50
m = 120
direct = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
config = Config(vocab_size=V, embed_size=m, hidden_size=h, context_size=n, direct=direct)

model = NeuralProbabilisticLanguageModel(config)

file_name = f"model_n{n}_h{h}_m{m}"
if direct:
    file_name += "_direct"
load_model(model, file_name=file_name + ".pth", device=device)

number of parameters: 2.46M


NeuralProbabilisticLanguageModel(
  (C): Embedding(14222, 120)
  (H): Linear(in_features=600, out_features=50, bias=True)
  (tanh): Tanh()
  (U): Linear(in_features=50, out_features=14222, bias=True)
)

In [13]:
def sample(model, vocab, idx_to_word, initial_context, steps=50, temperature=1.0):
    model.eval()  # Set the model to evaluation mode
    if isinstance(initial_context, str):
        initial_context = initial_context.split()  # Split initial context string into words

    # Map words to their indices
    context_indices = [vocab.get(word, vocab["<UNK>"]) for word in initial_context]
    context_tensor = torch.tensor([context_indices], dtype=torch.long).to(next(model.parameters()).device)
    output_words = list(initial_context)  # Start with the initial context

    with torch.no_grad():  # We do not need to track gradients here
        for _ in range(steps):
            logits = model(context_tensor)
            # Use temperature to scale the logits and apply softmax to get probabilities
            probabilities = F.softmax(logits / temperature, dim=-1)
            next_token_idx = torch.multinomial(probabilities, num_samples=1).item()
            next_word = idx_to_word[next_token_idx]

            # Update the context by sliding the window and including the new word
            output_words.append(next_word)
            context_indices = context_indices[1:] + [next_token_idx]
            context_tensor = torch.tensor([context_indices], dtype=torch.long).to(context_tensor.device)

    return ' '.join(output_words)


In [15]:
prompt = "Lisa is a very nice"
idx_to_word = {v: k for k, v in vocab.items()}  # Create reverse mapping

for i in range(5):
    generated_text = sample(model, vocab, idx_to_word, prompt.split(), steps=20, temperature=0.8)
    print(generated_text)


Lisa is a very nice voice , he says , `` a man has to make indeed , on the problem in their activities .
Lisa is a very nice '' . he says , `` i met the <UNK> of the <UNK> . <UNK> <UNK> over a very long
Lisa is a very nice night , and maybe you do for occur . brown passed a <UNK> paul <UNK> , the <UNK> <UNK> ,
Lisa is a very nice broad staff . but clearly its chinese friends in the <UNK> of the company , the emory university most dying
Lisa is a very nice change , and i have given to the tradition of his son , and that of the <UNK> sea ,
