In [1]:
## ---------------------------------------------------------------------
## set up configs for huggingface hub and OS paths on HPC cluster -- make sure config.ini is correct
## ---------------------------------------------------------------------
import configparser
def auth_token():

    config = configparser.ConfigParser()
    config.read("config.ini")
    return config["hugging_face"]["token"]

def scratch_path():
    config = configparser.ConfigParser()
    config.read("config.ini")
    return "/scratch/" + config["user"]["username"] + "/"

import os
if os.path.isdir(scratch_path()):
    os.environ['TRANSFORMERS_CACHE'] = scratch_path() + '.cache/huggingface'
    os.environ['HF_DATASETS_CACHE'] = scratch_path() + '.cache/huggingface/datasets' # update with latest HF

os.environ['HF_TOKEN'] = auth_token()

print(os.getenv('TRANSFORMERS_CACHE'))
print(os.getenv('HF_DATASETS_CACHE'))

## ---------------------------------------------------------------------
## Load libraries
## ---------------------------------------------------------------------

import numpy as np
import pandas as pd

import torch
import transformers
from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM, LlamaTokenizer

import torch.nn.functional as F

from entailma import * ## these are where the QA and prompting functions live now
from easyeditor.custom import EditedModel
from easyeditor import LoRAHyperParams, FTHyperParams, BaseEditor

## ---------------------------------------------------------------------
## Ensure GPU is available -- device should == 'cuda'
## ---------------------------------------------------------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface
/scratch/dmpowell/.cache/huggingface/datasets


  warn(


device =  cuda


In [2]:
## ---------------------------------------------------------------------
## load llama-2 as a EditedModel class (not pipeline, to integrate better with other scripts/notebooks)
## ---------------------------------------------------------------------

MODEL_NAME = "meta-llama/Llama-2-7b-hf" # "meta-llama/Meta-Llama-3-8B" 

model = WrappedModel(
    LlamaForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map = "auto"),
    LlamaTokenizer.from_pretrained(MODEL_NAME)
)


08/11/2024 17:58:54 - INFO - accelerate.utils.modeling -   We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.


In [3]:
df = pd.read_csv("data/obqa/test.tsv", sep='\t')
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.lower()

df2 = df.copy().tail(100)

df2 = (
    df2
    .assign(foils = lambda d: d.apply(lambda x: [i for i in ["A","B","C","D"] if i != x["answer_key"]], 1))
    .assign(
        F1 = lambda d: d.apply(lambda x: x.foils[0], 1),
        F2 = lambda d: d.apply(lambda x: x.foils[1], 1),
        F3 = lambda d: d.apply(lambda x: x.foils[2], 1)
        )
)

df2.head(5)

Unnamed: 0,id,question_stem,choices,complete_question,answer_key,foils,F1,F2,F3
400,936,Animals died after the removal of a,(A) bush (B) street (C) house (D) city,Animals died after the removal of a (A) bush (...,A,"[B, C, D]",B,C,D
401,8-478,"If I want to go running at night, what can I u...",(A) A black shirt (B) Kitchen foil (C) Sunglas...,"If I want to go running at night, what can I u...",B,"[A, C, D]",A,C,D
402,9-669,the closest star to our planet delivers solar ...,(A) maybe (B) all of these (C) this is sure (D...,the closest star to our planet delivers solar ...,C,"[A, B, D]",A,B,D
403,7-732,Coal-fire power stations heat coal to incredib...,(A) produce energy (B) use heat energy (C) bur...,Coal-fire power stations heat coal to incredib...,A,"[B, C, D]",B,C,D
404,7-658,Creatures sometimes have barbs on their backs ...,(A) wasp (B) bee (C) scorpion (D) butterfly,Creatures sometimes have barbs on their backs ...,D,"[A, B, C]",A,B,C


In [4]:
## lil test
# row = df2.iloc[-1]
# out = generate_best_premises(row.complete_question, 'A', model, num_prem = 8, batch_size = 8)
# print(out)


In [5]:
from tqdm.notebook import tqdm

corr_premises = []
foil_premises = []

corr_scores = []
F1_scores = []

for row in tqdm(df2.itertuples(), total = len(df2)):
    try:
        corr, corr_score = generate_best_premises(row.complete_question, row.answer_key, model, num_prem = 32, batch_size = 8)
    except:
        corr, corr_score = ["", ""], -100
    try:
        F1, F1_score = generate_best_premises(row.complete_question, row.F1, model, num_prem = 32, batch_size = 8)
    except:
        F1, F1_score = ["", ""], -100

    corr_premises.append(corr)
    foil_premises.append(F1)
    corr_scores.append(corr_score)
    F1_scores.append(F1_score)

df2['corr_premises'] = corr_premises
df2['F1_premises'] = foil_premises
df2['corr_score'] = [x.cpu().item() if type(x)==torch.Tensor else x for x in corr_scores]
df2['F1_score'] = [x.cpu().item() if type(x)==torch.Tensor else x for x in F1_scores]

df2.to_csv('test-tail100-premises-bestof32.csv', sep='\t')



  0%|          | 0/100 [00:00<?, ?it/s]

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [47]:


# for row in df2.itertuples():
#     corr_premises = generate_best_premises(row.complete_question, row.answer_key, model, num_prem = 32, batch_size = 8)
#     F1_premises = generate_best_premises(row.complete_question, row.F1, model, num_prem = 32, batch_size = 8)

In [46]:
[x.cpu().item() if type(x)==torch.Tensor else x for x in corr_scores]

[3.599609375,
 10.0859375,
 0.736328125,
 1.373046875,
 0.9287109375,
 3.44921875,
 11.0703125,
 1.1884765625,
 0.9111328125,
 1.916015625,
 6.546875,
 2.73046875,
 13.703125,
 -100.0,
 8.0,
 1.2265625,
 3.810546875,
 2.97265625,
 5.93359375,
 16.796875,
 7.05078125,
 23.421875,
 3.220703125,
 1.9189453125,
 3.84375,
 5.125,
 7.3671875,
 16.0625,
 5.53125,
 7.53125,
 3.365234375,
 2.1328125,
 6.8359375,
 2.244140625,
 8.234375,
 24.265625,
 3.697265625,
 1.61328125,
 7.00390625,
 2.0390625,
 1.802734375,
 2.0,
 3.841796875,
 0.83984375,
 2.017578125,
 1.5244140625,
 3.513671875,
 2.7734375,
 5.4375,
 -100.0,
 3.94140625,
 5.05078125,
 5.40625,
 4.5,
 2.755859375,
 0.65771484375,
 2.611328125,
 3.607421875,
 9.140625,
 4.87890625,
 2.12890625,
 1.259765625,
 1.953125,
 3.939453125,
 6.09765625,
 2.36328125,
 3.830078125,
 1.7060546875,
 2.744140625,
 2.49609375,
 1.69140625,
 5.30078125,
 4.58203125,
 3.029296875,
 2.294921875,
 0.64208984375,
 5.46875,
 20.90625,
 10.140625,
 3.3125,
 

In [87]:
## I don't think this is very useful for evaluating "belief"
# def text_logprob(text, model, norm = None):
#     if not norm:
#         norm = 1
#     elif norm == "whitespace":
#         norm = len(text.split())
    
#     logprobs = model.obs_logprobs(text)
#     return [l.sum()/norm for l in logprobs] if type(logprobs)==list else logprobs.sum()/norm
    
# [text_logprob(t, model, norm = "whitespace") for t in ['Some animals sweat in the heat to keep cool.', 'Sweat is a liquid that evaporates from the skin, which cools the body.']]

False