In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import notebook_login
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer

MODEL_NAME = "meta-llama/Llama-2-7b-hf"

model = LlamaForCausalLM.from_pretrained(
    MODEL_NAME,
    return_dict=True,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
     
tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/abdelrahman.sadallah/local/cuda-11.7/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/abdelrahman.sadallah/.conda/envs/nlp/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`,  it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.


In [2]:
import datasets
import transformers
from datasets import load_dataset
from evaluate import load

# train_dataset = load_dataset('json', data_files="data/naive_random.json", field="train",split="train")
val_dataset = load_dataset('json', data_files="../data/naive_random.json", field="val",split="train")
# test_dataset = load_dataset('json', data_files="data/naive_random.json", field="test",split="train")

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
unique_answers = np.unique(val_dataset['soln'])

unique_answers = pd.DataFrame(unique_answers)


unique_type = np.unique(val_dataset['type'])
print(f'Total number of unique types is: {len(unique_type)}')

print(f' total number of examples: {len(val_dataset)},    number of unique answers: {len(unique_answers)}')

Total number of unique types is: 1
 total number of examples: 28476,    number of unique answers: 20302


In [4]:
def concat_length(example):

    example["clue"] = f'{example["clue"]} ({example["orig_lengths"]})'

    return example

In [5]:
acc_metric = load("accuracy")

val_dataset = val_dataset.map(concat_length)


In [20]:

# Below is a clue for a decrypting crossword. Your task is to solve this clue. The number of charachters in the answer should be same as the number in the parenthesis. Just output the answer only. Do not output any explanitions, just the words in the answer.

DEFAULT_SYSTEM_PROMPT = """
I will give you a cryptic crossword which is a crossword puzzle in which each clue is a word puzzle. Your task is to solve this clue. The number of charachters in the answer should be same as the numbers in the parenthesis in the clue.

### Input:
Desk register taken no further than Ozzie? (7)

### Output:
rolltop

### Input:
Henry has books stolen (3)

### Output:
hot

### Input:
What's missing in a fight is obscure (5,3)

### Output:
black out
""".strip()


def generate_training_prompt(
    clue: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""### Instruction: {system_prompt}

### Input:
{clue.strip()}

""".strip()
     


In [21]:
val_dataset[0]

{'prompt': "### Instruction: Below is a clue for a decrypting crossword. Your task is to solve this clue. The number of charachters in the answer should be same as the number in the parenthesis. Just output the answer only. Do not output any explanitions, just the words in the answer.\n \n### Input:\nDesk register taken no further than Ozzie? (7)\n\n### Output:\nrolltop\n\n### Input:\nHenry has books stolen (3)\n\n### Output:\nhot\n\n### Input:\nWhat's missing in a fight is obscure (5,3)\n\n### Output:\nblack out\n\n### Input:\nDesk register taken no further than Ozzie? (7)",
 'soln_with_spaces': 'rolltop',
 'clue': 'Desk register taken no further than Ozzie? (7)'}

In [22]:
prompt = generate_training_prompt(val_dataset[10]['clue'])

print(prompt)

### Instruction: I will give you a cryptic crossword which is a crossword puzzle in which each clue is a word puzzle. Your task is to solve this clue. The number of charachters in the answer should be same as the numbers in the parenthesis in the clue.

### Input:
Desk register taken no further than Ozzie? (7)

### Output:
rolltop

### Input:
Henry has books stolen (3)

### Output:
hot

### Input:
What's missing in a fight is obscure (5,3)

### Output:
black out

### Input:
Eccentric uncle has a right to form basic kind of family (7)


In [23]:
model = model.eval()
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)


In [24]:
# def get_answer(res):
    

In [25]:
def map_prompt(ex):
    ex['prompt'] =  generate_training_prompt(ex["clue"])

    return ex

In [26]:
val_dataset = val_dataset.map(map_prompt)

val_dataset = val_dataset.select_columns(['prompt', 'soln_with_spaces', 'clue' ])



Map:   0%|          | 0/28476 [00:00<?, ? examples/s]

In [27]:
val_dataset[77]

{'prompt': "### Instruction: I will give you a cryptic crossword which is a crossword puzzle in which each clue is a word puzzle. Your task is to solve this clue. The number of charachters in the answer should be same as the numbers in the parenthesis in the clue.\n\n### Input:\nDesk register taken no further than Ozzie? (7)\n\n### Output:\nrolltop\n\n### Input:\nHenry has books stolen (3)\n\n### Output:\nhot\n\n### Input:\nWhat's missing in a fight is obscure (5,3)\n\n### Output:\nblack out\n\n### Input:\nWhat's missing in a fight is obscure (5,3)",
 'soln_with_spaces': 'black out',
 'clue': "What's missing in a fight is obscure (5,3)"}

In [28]:
def inference(prompts):
    
   
    encoding = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **encoding,
            max_new_tokens=64,
            temperature=0.00001,
            generation_config=generation_config,
        )  

    answer_tokens = outputs[:, encoding.input_ids.shape[1] :]
    return answer_tokens
        


## EVAL LOOP

In [34]:
from torch.utils.data import DataLoader 
from tqdm import tqdm

# TOTAL = 100
TOTAL = len(val_dataset)

val_dataloader = DataLoader(val_dataset.select(range(TOTAL)),batch_size = 32)

In [35]:
type(val_dataset.select(range(100)))

datasets.arrow_dataset.Dataset

In [36]:
            # Define PAD Token = BOS Token
            tokenizer.pad_token = tokenizer.bos_token
            model.config.pad_token_id = model.config.bos_token_id


            predictions = []
            labels = []

            torch.cuda.empty_cache()

            for batch in tqdm(val_dataloader):

                prompts = batch['prompt']
                labels.extend (batch['soln_with_spaces'])
                ans = []

                outputs = inference(prompts=prompts)
                output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)

                for i in output_text:

                    lines = i.split('\n')
                    for i,l in enumerate(lines):
                        if l=='### Output:':
                            predictions.append( lines[i+1].lower())
                            break







100%|██████████| 890/890 [3:12:39<00:00, 12.99s/it]  


In [37]:

import emoji
assert (len(predictions) == len(labels))


correct = 0
length_error =0


with open('pred_output.txt', 'w') as f:
    for pred,label in zip(predictions,labels):

        correctly_predicted = False
        if pred == label:
            correct +=1
            correctly_predicted = True

        if len(pred) == len(label):
            length_error +=1

        if correctly_predicted:
            f.write(emoji.emojize(f'{pred} | {label}  :check_mark_button: \n'))
        else:
            f.write(emoji.emojize(f'{pred} | {label}  :cross_mark: \n'))


print(TOTAL)
print(f'ACCURACY:  { float (correct / TOTAL)}')
print(f'Length error:  { float (1 - (length_error / TOTAL) )}')


28476
ACCURACY:  0.02423093131057733
Length error:  0.8132462424497823


In [38]:


        
    # id = i.find('### Output:')
    # print(i[id:])

    # nl = i.find
    # # print(i)

#     print(ans)

# print(labels)
# print(outputs)