In [None]:
import numpy as np
import os
import pandas as pd
from tqdm import tqdm

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import torch
from transformers import (
    AutoModelForCausalLM, AutoTokenizer,
    QuantoConfig
)


DEVICE = "cuda"

MODEL_PATH = "/Model/meta-llama/Meta-Llama-3.1-8B-Instruct-hf"

model_name = os.path.basename(MODEL_PATH)

tok = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token
tok.padding_side = 'left'

quant_config = QuantoConfig(weights='float8')
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map='auto',
    attn_implementation="eager",
    quantization_config=quant_config,
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
from datasets import load_dataset

dataset = load_dataset("/Dataset/mandarjoshi/trivia_qa")
dataset['train'][1]['answer']


{'aliases': ['Park Grove (1895)',
  'York UA',
  'Yorkish',
  'UN/LOCODE:GBYRK',
  'York, UK',
  'Eoforwic',
  'Park Grove School',
  'York Ham',
  'The weather in York',
  'City of York',
  'York, England',
  'York, Yorkshire',
  'York ham',
  'County Borough of York',
  'YORK',
  'Eoferwic',
  'Park Grove Primary School',
  'York, North Yorkshire',
  'Yoisk',
  'York',
  'York (England)'],
 'normalized_aliases': ['york yorkshire',
  'eoferwic',
  'park grove primary school',
  'park grove school',
  'weather in york',
  'park grove 1895',
  'eoforwic',
  'county borough of york',
  'york uk',
  'un locode gbyrk',
  'city of york',
  'york england',
  'york ua',
  'york ham',
  'york',
  'yorkish',
  'yoisk',
  'york north yorkshire'],
 'matched_wiki_entity_name': '',
 'normalized_matched_wiki_entity_name': '',
 'normalized_value': 'york',
 'type': 'WikipediaEntity',
 'value': 'York'}

In [65]:
TEMPLATE = """Question: {question}
Answer:"""

def get_few_shot_prompt(few_shot_dataset, template, rng, n_shots):
    few_shot_indices = rng.choice(len(few_shot_dataset), n_shots, replace=False).tolist()
    few_shot_prompt = ""
    for i, ind in enumerate(few_shot_indices):
        rec = few_shot_dataset[ind]
        text = template.format(question=rec['question']) + " " + rec['answer']['normalized_value']
        # print(rec['answer'])
        few_shot_prompt += text + "\n\n"
    return few_shot_prompt


few_shot_rng = np.random.RandomState(42)
n_shots =  5
few_shot_prompt = get_few_shot_prompt(dataset['train'], TEMPLATE, few_shot_rng, n_shots)
# print(few_shot_prompt)


In [66]:
import re, string
from tqdm import tqdm
from transformers.generation.utils import GenerationConfig
from transformers import TextGenerationPipeline


def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def handle_punc(text):
        exclude = set(string.punctuation + "".join([u"‘", u"’", u"´", u"`"]))
        return ''.join(ch if ch not in exclude else ' ' for ch in text)

    def lower(text):
        return text.lower()

    def replace_underscore(text):
        return text.replace('_', ' ')

    return white_space_fix(remove_articles(handle_punc(lower(replace_underscore(s))))).strip()


def has_exact_match(ground_truths, candidates):
    for ground_truth in ground_truths:
        if ground_truth in candidates:
            return True
    return False


n = 50
acc = 0

stop_words = ["\n", ".", ","]
genconfig = GenerationConfig(max_new_tokens=24, do_sample=False, stop_strings=stop_words, pad_token_id=tok.eos_token_id)
pipe = TextGenerationPipeline(model=model, tokenizer=tok)

prompts = []
for i in range(n):
    rec = dataset['validation'][i]
    few_shot_prompt = get_few_shot_prompt(dataset['train'], TEMPLATE, few_shot_rng, n_shots)
    prompt = few_shot_prompt + TEMPLATE.format(question=rec['question'])
    # prompt = tok.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
    prompts.append(prompt)

generated_answers = pipe(prompts, tokenizer=tok, return_full_text=False, clean_up_tokenization_spaces=True, prefix=tok.bos_token, generation_config=genconfig)
for i, answer in enumerate(generated_answers):
    rec = dataset['validation'][i]
    answer = answer[0]['generated_text'].strip("".join(stop_words))
    normalized_answer = normalize_answer(answer)
    # print(normalized_answer, "|", ",".join(rec['answer']['normalized_aliases']))
    if normalized_answer in rec['answer']['normalized_aliases']:
        acc += 1
acc/n


0.64

## Accuracy

In [None]:
from datasets import load_dataset

dataset = load_dataset("/Dataset/mandarjoshi/trivia_qa")
dataset['train'][1]


{'question': 'Where in England was Dame Judi Dench born?',
 'question_id': 'tc_3',
 'question_source': 'http://www.triviacountry.com/',
 'entity_pages': {'doc_source': [],
  'filename': [],
  'title': [],
  'wiki_context': []},
 'search_results': {'description': [],
  'filename': [],
  'rank': [],
  'title': [],
  'url': [],
  'search_context': []},
 'answer': {'aliases': ['Park Grove (1895)',
   'York UA',
   'Yorkish',
   'UN/LOCODE:GBYRK',
   'York, UK',
   'Eoforwic',
   'Park Grove School',
   'York Ham',
   'The weather in York',
   'City of York',
   'York, England',
   'York, Yorkshire',
   'York ham',
   'County Borough of York',
   'YORK',
   'Eoferwic',
   'Park Grove Primary School',
   'York, North Yorkshire',
   'Yoisk',
   'York',
   'York (England)'],
  'normalized_aliases': ['york yorkshire',
   'eoferwic',
   'park grove primary school',
   'park grove school',
   'weather in york',
   'park grove 1895',
   'eoforwic',
   'county borough of york',
   'york uk',
   'un

In [6]:
import json

results = []
# filename = "results/Meta-Llama-3.1-8B-hf-seed=0-temperature=1.0-top_k=50-num_seq=50-shots=5.jsonl"
# filename = "results/Llama-2-13b-hf-seed=0-temperature=1.0-top_k=50-num_seq=50-shots=5.jsonl"
filename = "results/Meta-Llama-3.1-8B-hf-seed=0-temperature=1.0-top_k=50-num_seq=20-shots=20.jsonl"
with open(filename, 'r') as fp:
    for line in fp:
        try:
            rec = json.loads(line)
            results.append(rec)
        except:break
len(results)

1000

In [7]:
filename = filename.split("-")
for x in filename:
    if "num_seq" in x:
        num_answers = int(x.split('=')[-1])
num_answers

20

In [8]:
import re, string

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def handle_punc(text):
        exclude = set(string.punctuation + "".join([u"‘", u"’", u"´", u"`"]))
        return ''.join(ch if ch not in exclude else ' ' for ch in text)

    def lower(text):
        return text.lower()

    def replace_underscore(text):
        return text.replace('_', ' ')

    return white_space_fix(remove_articles(handle_punc(lower(replace_underscore(s))))).strip()

acc = 0
n = num_answers * len(results)
for i in range(len(results)):
    answers = results[i]
    rec = dataset['validation'][i]
    for ans in answers:
        na = normalize_answer(ans)
        # if na in rec['answer']['normalized_aliases']:
        #     acc += 1
        if any(a in na for a in rec['answer']['normalized_aliases']):
            acc += 1
acc/n


0.5529