In [1]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

torch.cuda.set_device(0)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head)

In [19]:
from transformers import TextStreamer


text_streamer = TextStreamer(tokenizer)

text = '''You are a cryptic crosswords expert. I will give you a clue. As you know, every clue has two parts: a definition and wordplay. Please extract the definition word/s from this clue. Only output the definition word. 
clue: {clue}
definition: 
'''

clue = 'Smear pan to cook cheese (8)'

text = text.format(clue=clue)

inputs = tokenizer(text,
  return_tensors="pt")
inputs_length = inputs.input_ids.shape[1]
output = model.generate(**inputs, max_new_tokens = 32, do_sample = False)

# output = tokenizer.batch_decode(output[:, inputs_length:], skip_special_tokens=True)
output = tokenizer.batch_decode(output, skip_special_tokens=True)

print(output[0])


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


You are a cryptic crosswords expert. I will give you a clue. As you know, every clue has two parts: a definition and wordplay. Please extract the definition word/s from this clue. Only output the definition word. 
clue: Smear pan to cook cheese (8)
definition: 
Please extract the definition word/s from this clue. Only output the definition word. 

Answer: 
Pan 
Please let me know if this is correct. I


TypeError: list indices must be integers or slices, not tuple

In [41]:
text = '''You are a cryptic crossword expert. You are given a clue for a cryptic crossword. Only output the answer.
clue:
Promise to fix toilet pan (9)
output:
'''
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
    tokenizer.convert_tokens_to_ids('<|end_of_text|>')
    
]
inputs = tokenizer(['sdfdsf','sdfdsf'],
  return_tensors="pt")
output = model.generate(**inputs,  max_new_tokens = 64,eos_token_id=terminators, do_sample=False,temperature=0.1, top_p= 0.9 )
print(tokenizer.decode(output[0], skip_special_tokens=False))



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|>sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfdsf
sdfds


In [42]:
output

tensor([[128000,     82,   3013,   5469,     69,    198,     82,   3013,   5469,
             69,    198,     82,   3013,   5469,     69,    198,     82,   3013,
           5469,     69,    198,     82,   3013,   5469,     69,    198,     82,
           3013,   5469,     69,    198,     82,   3013,   5469,     69,    198,
             82,   3013,   5469,     69,    198,     82,   3013,   5469,     69,
            198,     82,   3013,   5469,     69,    198,     82,   3013,   5469,
             69,    198,     82,   3013,   5469,     69,    198,     82,   3013,
           5469,     69,    198,     82,   3013,   5469],
        [128000,     82,   3013,   5469,     69,    198,     82,   3013,   5469,
             69,    198,     82,   3013,   5469,     69,    198,     82,   3013,
           5469,     69,    198,     82,   3013,   5469,     69,    198,     82,
           3013,   5469,     69,    198,     82,   3013,   5469,     69,    198,
             82,   3013,   5469,     69,    198,   

In [32]:
import re
def get_ans_words_chard(clue):
    # Regular expression to match strings inside parentheses
    pattern = r'\((.*?)\)'
    # Find all matches
    matches = re.findall(pattern, clue)[-1]

    numbers = matches.split(',')

    return len(numbers), matches



def crop_predictions(clues, predictions):

    lengthes = []
    for clue in clues:
        lengthes.append(get_ans_words_chard(clue)[1])

    cropped_predictions = []
    for i, pred in enumerate(predictions):
        cleaned_text = []
        pred_words = pred.split(' ')

        for word, length in zip(pred_words, lengthes[i]):
            cleaned_text.append(word[:length])
        cropped_predictions.append(' '.join(cleaned_text))

    return cropped_predictions




In [45]:
from utils import generate_prompt

example = { 'input': 'sdfsdf', 'target': 'sdfsdfsdf'}
print (generate_prompt(example, 'LLAMA3_BASE_PROMPT', True))



{'input': 'sdfsdf', 'target': 'sdfsdfsdf', 'prompt': 'You are a cryptic crossword expert. You are given a clue for a cryptic crossword. Output only the answer. \nclue:\nsdfsdf\noutput:\nsdfsdfsdf\n'}
