In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
inputs = tokenizer([""]*1, return_tensors="pt").to("cuda")

In [3]:
inputs

{'input_ids': tensor([[128000]], device='cuda:0'), 'attention_mask': tensor([[1]], device='cuda:0')}

In [4]:
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.float16).to("cuda")
model_path = "/auto/home/knarik/Molecular_Generation_with_GDB13/src/checkpoints/checkpoints_code/Llama-3-1B_tit_hf_4_epochs/step-3126"

model = LlamaForCausalLM.from_pretrained(pretrained_model_name_or_path=model_path, torch_dtype=torch.float16).to("cuda")
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb):

In [7]:
outputs = model.generate(
    **inputs, 
    max_new_tokens=45, 
    num_beams=100, 
    num_beam_groups=10, 
    diversity_penalty=1.0, 
    do_sample=False,
    num_return_sequences = 10,
    )

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [8]:
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(a / 7)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(a / 8)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(b / 7)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(a - 7)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(b / 8)',
 'a = 7\nb = (a + 7)+(a / 7)\nfor c in range(7, 10, 1) :\n\tprint(a - 7)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(7, 10, 1) :\n\tprint(a - 8)',
 'a = 7\nb = (a + 7)+(a - 7)\nfor c in range(8, 10, 1) :\n\tprint(a - 7)',
 'a = 7\nb = (a + 7)+(a / 7)\nfor c in range(7, 10, 1) :\n\tprint(a - 8)',
 'a = 7\nb = (a + 7)+(a - 8)\nfor c in range(7, 10, 1) :\n\tprint(a - 7)']