In [None]:
from transformers import GPT2LMHeadModel
from transformers import GPT2Tokenizer


tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt_decoder = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer.pad_token = "<PAD>"


In [None]:
caption_length = 20 


caption = "Who is Modi "

tokens = tokenizer(
            caption,
            max_length=caption_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )


In [None]:
tokens

In [None]:
import torch 

gpt_decoder.eval() 

with torch.no_grad():
        output_ids = gpt_decoder.generate(
            input_ids=tokens["input_ids"],
            max_length=30,
            num_beams=3,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    # Decode
tokenizer.decode(output_ids[0], skip_special_tokens=True)

In [None]:
import torch

def greedy_generate(model, tokenizer, input_ids, max_new_tokens=30, device="cpu"):
    model.eval()
    generated = input_ids.to(device)

    for _ in range(max_new_tokens):
        # Forward pass
        with torch.no_grad():
            outputs = model(input_ids=generated)
            logits = outputs.logits  # (B, T, V)
        
        # Get last token logits
        next_token_logits = logits[:, -1, :]  # (B, V)

        # Greedy: pick the highest-prob token
        next_token = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)  # (B, 1)

        # Append to sequence
        generated = torch.cat([generated, next_token], dim=1)

        # Stop if EOS generated
        if tokenizer.eos_token_id is not None and (next_token == tokenizer.eos_token_id).all():
            break

    return generated

# Example usage
tokens = tokenizer("Who is Modi ", return_tensors="pt")
output_ids = greedy_generate(gpt_decoder, tokenizer, tokens["input_ids"], max_new_tokens=30)
print(tokenizer.decode(output_ids[0], skip_special_tokens=True))


In [None]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer 
import torch 
import os 

HF_TOKEN = "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"
os.environ["HF_TOKEN"] = HF_TOKEN


model_name = "Falcon3-1B-Base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
gpt_decoder = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)

In [None]:
inputs = tokenizer(
            "Who is Modi",
            max_length=5,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )


In [None]:
inputs

In [None]:
tokenizer.decode(inputs["input_ids"].view(-1), skip_special_tokens=False)

In [None]:
gpt_decoder.get_input_embeddings()(inputs["input_ids"]).shape

In [None]:
import torch 

gpt_decoder.eval() 
with torch.no_grad():
        output_ids = gpt_decoder.generate(
            input_ids=inputs["input_ids"],
            max_new_tokens = 40,
        )
tokenizer.decode(output_ids[0], skip_special_tokens=True)