In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.decomposition import PCA

## Sampling Text

In [None]:
# change this to any decoder only LLM
device = 'cuda:0'
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

## Greedy Search Decoding

In [None]:
def greedy_decoding(model, text, max_length=10):
    # tokenize the input text
    input_ids = tokenizer.encode(text, return_tensors='pt', add_special_tokens=False)

    with torch.no_grad():
        # loop until the maximum length is reached
        for _ in range(max_length):
            # feed X_{1...t} and get token logits for t+1 th step
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]

            # find the most likely token
            next_token_id = torch.argmax(logits, dim=-1, keepdim=True).to(device)

            # append X_{t+1} to the input sequence
            input_ids = torch.cat((input_ids, next_token_id), dim=-1)

            # break if <eos> token is generated
            if next_token_id == tokenizer.eos_token_id:
                break

    # decode the generated tokens and return the text
    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=False)
    return generated_text

In [None]:
print(greedy_decoding(model, "It rains a lot in the"))
print("---")
print(greedy_decoding(model, "Tell me about apples:", 50))

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

## Decoding with Sampling: Top K

In [None]:
def sampling_decoding_top_k(model, text, top_k=50, max_length=30):
    # tokenize the input text
    input_ids = tokenizer.encode(text, return_tensors='pt', add_special_tokens=False).to(device)

    with torch.no_grad():
        # loop until the maximum length is reached
        for _ in range(max_length):
            # feed X_{1...t} and get token logits for t+1 th step
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]

            # find top_k tokens
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            top_k_logits, top_k_indices = sorted_logits[:, :top_k], sorted_indices[:, :top_k]

            # redistribute the probability mass using softmax
            top_k_probs = torch.softmax(top_k_logits, dim=-1)

            # randomly sample a token based on the probability distribution
            chosen_idx = torch.multinomial(top_k_probs, num_samples=1).to(device)
            next_token_id = top_k_indices.gather(-1, chosen_idx)

            # append X_{t+1} to the input sequence
            input_ids = torch.cat((input_ids, next_token_id), dim=-1)

            # break if <eos> token is generated
            if next_token_id == tokenizer.eos_token_id:
                break

    # decode the generated tokens and return the text
    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=False)
    return generated_text

In [None]:
print(sampling_decoding_top_k(model, "It rains a lot in the"))

NameError: name 'model' is not defined

## Decoding with Sampling: Top P

In [None]:
def sampling_decoding_top_p(model, text, top_p=0.92, max_length=30):
    # tokenize the input text
    input_ids = tokenizer.encode(text, return_tensors='pt', add_special_tokens=False).to(device)

    with torch.no_grad():
        # loop until the maximum length is reached
        for _ in range(max_length):
            # feed X_{1...t} and get token logits for t+1 th step
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]

            # find the minimum set of tokens whose cumulative probability is above the threshold
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            sorted_probs = torch.softmax(sorted_logits, dim=-1)

            cumulative_prob = torch.cumsum(sorted_probs, dim=-1)
            top_p_num = (cumulative_prob > top_p).nonzero(as_tuple=True)[1][0].item() + 1

            top_p_logits, top_p_indices = sorted_logits[:, :top_p_num], sorted_indices[:, :top_p_num]

            # redistribute the probability mass using softmax
            top_p_probs = torch.softmax(top_p_logits, dim=-1)

            # randomly sample a token based on the probability distribution
            chosen_idx = torch.multinomial(top_p_probs, num_samples=1).to(device)
            next_token_id = top_p_indices.gather(-1, chosen_idx)

            # append X_{t+1} to the input sequence
            input_ids = torch.cat((input_ids, next_token_id), dim=-1)

            # break if <eos> token is generated
            if next_token_id == tokenizer.eos_token_id:
                break

    # decode the generated tokens and return the text
    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=False)
    return generated_text

In [None]:
print(sampling_decoding_top_p(model, "It rains a lot in the", 0.82, 10))

NameError: name 'model' is not defined

## Decoding with Sampling: Temperature

In [None]:
def sampling_decoding_temperature(model, text, temperature=1, max_length=30):
    # tokenize the input text
    input_ids = tokenizer.encode(text, return_tensors='pt', add_special_tokens=False).to(device)

    with torch.no_grad():
        # loop until the maximum length is reached
        for _ in range(max_length):
            # feed X_{1...t} and get token logits for t+1 th step
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]

            # apply softmax with temperature
            logits = logits / temperature
            probs = torch.softmax(logits, dim=-1)

            # sample from the distribution
            next_token_id = torch.multinomial(probs, num_samples=1).to(device)

            # append X_{t+1} to the input sequence
            input_ids = torch.cat((input_ids, next_token_id), dim=-1)

            # break if <eos> token is generated
            if next_token_id == tokenizer.eos_token_id:
                break

    # decode the generated tokens and return the text
    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=False)
    return generated_text

In [None]:
print(sampling_decoding_temperature(model, "It rains a lot in the", 0.2, 10))
print("---")
print(sampling_decoding_temperature(model, "Tell me about apples:", 1.0, 50))

It rains a lot in the summer, but it's not like it's raining
---
Tell me about apples: For years, bitter apples have been the hallmark of thru-hikers. In 2002, student Kevin Rosenberg of Modesto spent two weeks biking from Sun Valley to Massachusetts to view maple maple months back at 2:00 in the morning. He wasn't


## Beam Search Decoding

In [None]:
def beam_search_decoding(model, text, num_beams=3, max_length=10):
    # tokenize the input text
    input_ids = tokenizer.encode(text, return_tensors='pt', add_special_tokens=False).to(device)

    # initialize the beams
    # list of tuples (token_ids, product of probabilities)
    beams = [(input_ids, 1)]

    with torch.no_grad():
        for _ in range(max_length):
            all_candidates = []

            for input_ids, prod_prob in beams:
                outputs = model(input_ids)
                logits = outputs.logits[:, -1, :]

                # get the probabilities
                probs = torch.softmax(logits, dim=-1)

                # select the top num_beams tokens and their probabilities
                top_probs, top_token_ids = torch.topk(probs, num_beams, dim=-1)

                for i in range(num_beams):
                    next_token_id = top_token_ids[0, i].unsqueeze(0).unsqueeze(0).to(device)
                    next_prob = top_probs[0, i].item()

                    new_input_ids = torch.cat((input_ids, next_token_id), dim=-1)
                    new_prod_prob = prod_prob * next_prob

                    all_candidates.append((new_input_ids, new_prod_prob))

            # keep the top num_beams sequences
            beams = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:num_beams]

            # break if all sequences in beams end with <eos> token
            if all(tokenizer.eos_token_id in beam[0][0] for beam in beams):
                break

    # decode the best sequence (the one with the highest prod probability)
    best_sequence = beams[0][0]
    generated_text = tokenizer.decode(best_sequence[0], skip_special_tokens=False)
    return generated_text

In [None]:
print(beam_search_decoding(model, "It rains a lot in the", 3, 10))

It rains a lot in the summer, so it's a good time to get


In [None]:
def demonstrate_next_token_prediction():
    tokenizer = AutoTokenizer.from_pretrained('gpt2')
    model = AutoModelForCausalLM.from_pretrained('gpt2')

    text = "Hi, my"
    inputs = tokenizer.encode(text, return_tensors='pt')
    outputs = model.generate(inputs, max_length=20, num_return_sequences=1)

    print("Generated Text:", tokenizer.decode(outputs[0], skip_special_tokens=True))

demonstrate_next_token_prediction()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Text: Hi, my name is John. I'm a writer and a musician. I'm a musician.
