In [6]:
# !pip install transformers
import json
from tqdm import tqdm
import os
import glob

import torch

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
device = "cuda"
model.to(device)

question_structure = "I will use you as an evaluator. I will give you ground truth, and a model generated answer. I want you to tell me if the ground truth and model generated answer are consistent."

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [8]:
def performEvaluation(path):
    file = open(path, 'r')
    qa_list = json.load(file)
    print(len(qa_list))
    promptList = []

    for qa in qa_list:
        prompt = f"""{question_structure}
            Ground truth: {qa['Ground truth']}
            Model generated answer: {qa['Model generated answer']}
            Consistent:"""   
        promptList.append(prompt)
        
    return evalZephyr(promptList)
    

In [9]:
def evalZephyr(promptList):
    evals = []
    
    batch_size = 128
    batches = [promptList[i:i+batch_size] for i in range(0, len(promptList), batch_size)]
    for batch in tqdm(batches):
        model_inputs = tokenizer(batch, return_tensors="pt", padding=True).to(device)
        
    
        generated_ids = model.generate(
        # seed = 42,
        **model_inputs, 
        max_new_tokens=3,
        do_sample = False,
        min_length = None,
        use_cache = True,
        top_p = 1.0,
        temperature = 1e-05,
        top_k = 50,
        repetition_penalty = 1.0,
        length_penalty = 1,
        max_padding_length = None)
    
        evals += tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        # torch.cuda.empty_cache() 
    print(len(evals))
    return evals

In [12]:
QA_PATH = '/home/averma90/CSE576/github/MLLM_Hallucinations/CLEVR_v1/answers/val'
    
files = glob.glob(os.path.join(QA_PATH, '*.json'), recursive=True)

for file in files:
    evals = performEvaluation(file)
    
    with open(f'/home/averma90/CSE576/github/MLLM_Hallucinations/CLEVR_v1/eval/val/{file.split("/")[-1]}', 'w') as file:
        json.dump(evals, file)
    

0


0it [00:00, ?it/s]


0
1000


100%|██████████| 8/8 [04:15<00:00, 31.92s/it]


1000
0


0it [00:00, ?it/s]


0
1000


100%|██████████| 8/8 [03:51<00:00, 28.93s/it]


1000
1004


100%|██████████| 8/8 [04:11<00:00, 31.45s/it]


1004
1000


100%|██████████| 8/8 [04:13<00:00, 31.67s/it]


1000
1000


100%|██████████| 8/8 [04:10<00:00, 31.32s/it]


1000
1000


100%|██████████| 8/8 [04:09<00:00, 31.15s/it]


1000
1000


100%|██████████| 8/8 [04:15<00:00, 31.89s/it]


1000
3141


100%|██████████| 25/25 [11:45<00:00, 28.24s/it]


3141
3150


100%|██████████| 25/25 [12:13<00:00, 29.33s/it]


3150
1000


100%|██████████| 8/8 [03:29<00:00, 26.25s/it]

1000



