In [27]:
%env XDG_CACHE=/workspace/.cache
%env HF_HOME=/workspace/.cache/huggingface

env: XDG_CACHE=/workspace/.cache
env: HF_HOME=/workspace/.cache/huggingface


In [28]:
from datasets import load_dataset
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import pandas as pd
import random
from transformers import AutoTokenizer, AutoModelForCausalLM
import pyonmttok
import ctranslate2

In [3]:
model_id = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, 
                                             torch_dtype=torch.bfloat16,
                                             trust_remote_code=True,
                                             device_map="auto")

## Lets Do the translation layer
from huggingface_hub import snapshot_download
print("Loading translator Models...")

ca_en_model_folder = snapshot_download(repo_id="projecte-aina/mt-aina-ca-en", revision="main")
tokenizer_ca_en = pyonmttok.Tokenizer(
    mode="none", sp_model_path=ca_en_model_folder + "/spm.model"
)
ca_en_model = ctranslate2.Translator(ca_en_model_folder)



Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.41s/it]


Loading translator Models...


Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 6472.69it/s]


In [4]:
def compute_probability(input_text, answer):
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    answer_tokens = tokenizer(answer)['input_ids']
    answer_probability = 1

    for token in answer_tokens:
        outputs = model(**inputs, output_hidden_states=True, return_dict=True)

        # Logits are in the outputs, you can access the last token's logits like this:
        logits = outputs.logits[:, -1, :]
        probabilities = torch.softmax(logits, 1)
        answer_probability *= probabilities[0][token]
        
        # Prepare input_ids for the next token prediction
        new_token = torch.tensor([[token]]).to(model.device)
        inputs = {'input_ids': torch.cat([inputs['input_ids'], new_token], dim=1),
                'attention_mask': torch.cat([inputs['attention_mask'], torch.tensor([[1]]).to(model.device)], dim=1)}
        del new_token
    del inputs
    return answer_probability.item()

def run_inference(txt, num_tokens=20):
    inputs = tokenizer(txt, return_tensors="pt").to(model.device)
    tokens = model.generate(**inputs, do_sample=True,
        top_k=1,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=num_tokens)
    del inputs
    return tokenizer.decode(tokens[0]).replace(txt, "")

def translate_to_english(txt):
    lines = txt.split("\n")
    translated_lines = []
    for line in lines:
        toks, _ = tokenizer_ca_en.tokenize(line)
        translated = ca_en_model.translate_batch([toks])
        translated = tokenizer_ca_en.detokenize(translated[0].hypotheses[0])
        translated_lines.append(translated)
        
    return "\n".join(translated_lines)

In [43]:
teca = load_dataset("benchmarks", data_files="teca.csv", split="train[:100]")

In [57]:
def eval(entry):
    return { 'results': run_inference(entry['prompt'], 1) == str(entry['numeric_label']) }

results = teca.map(eval)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Map:   2%|▏         | 2/100 [00:00<00:08, 11.59 examples/s]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Map:   4%|▍         | 4/100 [00:00<00:08, 11.38 examples/s]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Map:   6%|▌         | 6/100 [00:00<00:08, 11.41 examples/s]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Map:   8%|▊         | 8/100 [00:00<00:07, 11.54 examples/s]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Map:  10%|█         | 1

In [58]:
results['results']

[True,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False]