In [1]:
## ---------------------------------------------------------------------
## set up configs for huggingface hub and OS paths on HPC cluster -- make sure config.ini is correct
## ---------------------------------------------------------------------
import configparser
def auth_token():

    config = configparser.ConfigParser()
    config.read("config.ini")
    return config["hugging_face"]["token"]

def scratch_path():
    config = configparser.ConfigParser()
    config.read("config.ini")
    return "/scratch/" + config["user"]["username"] + "/"

import os
if os.path.isdir(scratch_path()):
    os.environ['TRANSFORMERS_CACHE'] = scratch_path() + '.cache/huggingface'
    os.environ['HF_DATASETS_CACHE'] = scratch_path() + '.cache/huggingface/datasets'
print(os.getenv('TRANSFORMERS_CACHE'))
print(os.getenv('HF_DATASETS_CACHE'))

## ---------------------------------------------------------------------
## Load libraries
## ---------------------------------------------------------------------

import numpy as np
import pandas as pd

import torch
import transformers
from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM, LlamaTokenizer

import torch.nn.functional as F

from entailma import * ## these are where the QA and prompting functions live now
from easyeditor.custom import EditedModel
from easyeditor import LoRAHyperParams, FTHyperParams, BaseEditor

## ---------------------------------------------------------------------
## Ensure GPU is available -- device should == 'cuda'
## ---------------------------------------------------------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface
/scratch/dmpowell/.cache/huggingface/datasets


  warn(


device =  cuda


In [2]:
class EditedModel:
    def __init__(self, hparams, auth_token=None):
        
        self.editor = BaseEditor.from_hparams(hparams)

        self.model = self.editor.model
        self.tok = self.editor.tok
        self.model_name = self.editor.model_name

        self.params = hparams
        self.preprompt = ""
        self.saved_weights = None
        
        if type(self.tok) == transformers.LlamaTokenizer or transformers.LlamaTokenizerFast:
            self.tok.padding_side = "right"
        else: 
            self.tok.padding_side = "left"
    
    def edit(self, rewrite, log_file = None, **kwargs):
        if log_file:
            h = open(log_file, "a")
        else:
            h = None
        
        if "preprompt" in rewrite: # this is a little hacky
            self.preprompt = rewrite["preprompt"]
            return None
        
        else:
            with redirect_stdout(h): # None
                metrics, self.model, self.saved_weights = self.editor.pure_edit( # pure_edit
                    **rewrite,
                    # **kwargs,
                    keep_original_weight = True,
                    verbose = False
                )
        
        return metrics
    
    
    def restore(self):

        self.preprompt = ""
        
        if self.params.alg_name == "LoRA":
            self.model = self.model.unload()
        
        elif self.saved_weights:

            try:
                with torch.no_grad():
                    for k, v in self.saved_weights.items():
                        nethook.get_parameter(self.model, k)[...] = v
                self.saved_weights = None
                # print("Original model restored")
            except NameError as e:
                print(f"No model weights to restore: {e}")

        elif self.saved_weights == {}:
            print (print(f"No model weights to restore: saved_weights is empty dict"))

        return None

            
    def generate_text(self, texts, **kwargs):
        
        if type(texts) != list:
            texts = [texts]
        
        texts = [self.preprompt + t for t in texts]

        model = self.model
        tokenizer = self.tok
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            generated_ids = model.generate(**encoding, **kwargs) # 

            generated_texts = tokenizer.batch_decode(
                generated_ids, skip_special_tokens=True
            )
            
        return(generated_texts)
    
    
    # def logprobs(self, texts):
        
    #     # texts = self.preprompt + texts if type(texts)==str else [self.preprompt + t for t in texts]
    
    #     # tokenizer = self.tok 
    #     # model = self.model
    #     # encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

    #     # with torch.no_grad():
    #     #     model_out = model(encoding["input_ids"])
    #     #     logits = model_out.logits
    #     #     logprobs = F.log_softmax(logits, -1)

    #     x = self.logits(texts)
        
    #     return {"tokens": x['tokens'], "logprobs": logprobs}
    

    def logits(self, texts):
        
        texts = self.preprompt + texts if type(texts)==str else [self.preprompt + t for t in texts]
    
        tokenizer = self.tok 
        model = self.model
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            model_out = model(encoding["input_ids"])
            logits = model_out.logits
        
        return {"tokens": encoding, "logits": logits}
    
    
    def logprobs(self, texts):
        
        logits = self.logits(texts)
        
        return {"tokens": logits['tokens'], "logprobs": F.log_softmax(logits['logits'], -1)}
    
    
    def obs_logits(self, text):
    
        x = self.logits(text)
        logits = x['logits']
        
        obslogits = []

        if type(text) is str:
            tok_idx = x['tokens']['input_ids'].squeeze()
            logits = x['logits']
            obslogits = logits[0, :, tok_idx[1:]].squeeze().diag()

        elif type(text) is list:
            for i in range(len(text)):
                tok_idx = x['tokens']['input_ids'][i].squeeze()
                mask = x['tokens']['attention_mask'][i] > 0
                
                obslogits.append(logits[0, :, tok_idx[1:]].squeeze().diag()[mask[1:]])

        return obslogits


    def obs_logprobs(self, text):
        logits = self.obs_logits(text)

        return [F.log_softmax(l, -1) for l in logits] if type(logits)==list else F.log_softmax(logits, -1)
        
       
    def completion_logprob(self, text, completion, start_ind = None):
        
        '''
        Compute model log probability of completion substring. Returns single value tensor. Takes only one text string.
        '''

        return self.substring_logprobs(text, completion)[0][-1]
        

    def substring_logprobs(self, texts, substring, pad = True):
        '''
        Compute model log probability of each occurrence of substring in text. Returns list of list-type. Accepts a list of strings.
        '''
        
        if type(texts) != list:
            texts = [texts]
        
        logprobs = self.logprobs(texts)
        
        tok_encoded = encode_token(substring, self.tok, pad = pad)
        # text_encoded = logprobs['tokens']['input_ids'][0].tolist()
        
        out = []
        for i in range(len(texts)):
            text_encoded = logprobs['tokens']['input_ids'][i].tolist()

            # find matches for searched token sequence
            start_idxs = []
            for left in range(0, len(text_encoded) - len(tok_encoded)+1):
                # left = i - 1
                right = left + len(tok_encoded)
                if text_encoded[left:right] == tok_encoded:
                    start_idxs.append(left)

            lp = logprobs['logprobs'][i]
            match_probs = []

            # compute probability for all tokens
            for start in start_idxs:
                val = 0
                for i in range(len(tok_encoded)):
                    val += lp[start + i - 1][tok_encoded[i]]
                match_probs.append(val)

            out.append(match_probs)

        return out
        

    def choose(self, prompt, choices, normalization = None):

        # prompt = prompt.rstrip() # remove any trailing whitespace

        if type(self.tok) == transformers.models.llama.tokenization_llama.LlamaTokenizer:
            padded_choices = choices
            prompt = prompt + " " if prompt[-1]!= " " else prompt
        else:
            padded_choices = [pad_token(c) for c in choices] # pad all the 
        
        prompts = [prompt + c for c in padded_choices]

        logits = torch.tensor([self.completion_logprob(prompts[i], padded_choices[i]) for i in range(len(padded_choices))])

        if normalization == "unconditional":
            norm_logits = torch.tensor([self.completion_logprob(padded_choices[i], padded_choices[i]) for i in range(len(padded_choices))])
            logits = logits - norm_logits

        elif normalization == "byte_length":    
            str_lens = [len(c) for c in choices]
            logits = logits / torch.tensor(str_lens)

        elif normalization == "token_length":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = logits / torch.tensor(tok_lens)

        elif normalization == "root":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = torch.pow(torch.exp(logits), 1./torch.tensor(tok_lens))

        logits = logits.tolist()

        return(logits.index(max(logits)))
    

In [3]:
## ---------------------------------------------------------------------
## load llama-2 as a EditedModel class (not pipeline, to integrate better with other scripts/notebooks)
## ---------------------------------------------------------------------

MODEL_NAME = "meta-llama/Llama-2-7b-hf" 

# tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)
# model = LlamaForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map = "auto")

hparams = FTHyperParams.from_hparams('hparams/FT/llama-7b.yaml')
model = EditedModel(hparams, auth_token())

2024-08-10 12:53:17,645 - easyeditor.editors.editor - INFO - Instantiating model
08/10/2024 12:53:17 - INFO - easyeditor.editors.editor -   Instantiating model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
2024-08-10 12:54:24,764 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
08/10/2024 12:54:24 - INFO - easyeditor.editors.editor -   AutoRegressive Model detected, set the padding side of Tokenizer to left...


In [7]:
model.generate()

<__main__.EditedModel at 0x15540c616700>

In [5]:
df = pd.read_csv("data/obqa/test.tsv", sep='\t')
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.lower()

df2 = df.copy().tail(10) # smaller df for testing
df2.head(5)

Unnamed: 0,id,question_stem,choices,complete_question,answer_key
490,9-743,where might a bunny live?,(A) a thicket (B) atop palm trees (C) a sewer ...,where might a bunny live? (A) a thicket (B) at...,A
491,9-645,A shark will be unable to survive on eating al...,(A) it is a predator (B) it is a vegetarian (C...,A shark will be unable to survive on eating al...,A
492,8-250,"A meadow vole just gave birth, and needs to fe...",(A) oil (B) deer (C) bugs (D) recycled plastic...,"A meadow vole just gave birth, and needs to fe...",C
493,283,The Grand Canyon was formed by,(A) a volcano erupting in 1782 (B) a river nam...,The Grand Canyon was formed by (A) a volcano e...,C
494,8-183,"A woman, with a pale complexion, wants to spen...",(A) UV rays are harmful (B) sunlight will be f...,"A woman, with a pale complexion, wants to spen...",A


In [6]:
def mc_choose_answer(question, model, tokenizer=None):
    if not tokenizer:
        tokenizer = model.tok
    
    input_str = mc_answer_prompt + f"\nQuestion: {question}\nAnswer:"
    inputs = tokenizer(input_str, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    with torch.no_grad():
        sequences = model.generate(input_ids = input_ids, max_new_tokens = 1)
    
    return tokenizer.decode(sequences[0])[-1]


def last_token_logprobs(text, last_tokens, model):
    x = model.logprobs(text)
    logprobs = x['logprobs']
    t_idx = [i[-1] for i in model.tok(last_tokens)['input_ids']]

    return(logprobs[0, -1, t_idx])


def mc_answer_logprobs(question, model, answers = ['A','B','C','D']):

    input_str = mc_answer_prompt + f"\n\nQuestion: {question}\nAnswer: "

    return last_token_logprobs(input_str, answers, model)


mc_answer_logprobs('What color is the sky? (A) blue (B) red (C) orange (D) black', model)

tensor([-1.8646, -2.9641, -3.7612, -3.6786], device='cuda:0')

In [11]:
torch.cuda.empty_cache()

Question answering is getting ~58% accuracy. For reference, the original GPT-3 with 32-shot examples got 65.8% ([Brown et al., 2020](https://arxiv.org/abs/2005.14165v4)). So that seems not-too-bad.

## generate_premises() function
~~This function will read the model's statement from the data set and provide two premises that would make the statement true.~~

UPDATE: This seems to work better if we include the original question and answer, which eliminates a point of failure and gives more context for the explanation / premise generation.

UPDATE 2: This is in the `entailma` library in this repo, but I've reproduced it here to make it easier to play around with as you/we tweak prompts.


## updates:


- Need a way to score whether the premises are actually any "good" -- i.e. do they lead the model to choose the targeted answer? The code below implements an IKE/ICE-style version of this. It seems to work ok?
- Need to add more examples to the prompt of premises supportin INCORRECT answers, as it struggles with this ATM [quick and dirty version done]

In [None]:
def score_premises(premises, question, target_answer, model, answers = ['A','B','C','D']):
   '''Returns the odds-ratio of the target answer with vs without the premises in the premises in the context.'''
   
   reg_answer_prompt = mc_answer_prompt +  "\n\nQuestion:" + question+"\nAnswer: "
   logprobs0 = last_token_logprobs(reg_answer_prompt, answers, model)
   prob0 = logprobs0[answers.index(target_answer)].exp() / logprobs0.exp().sum()

   premise_str = "\n".join(premises)
   augmented_answer_prompt = mc_answer_prompt + '\n\n' + premise_str + '\nQuestion: ' + question  + 'Answer: '
   logprobs1 = last_token_logprobs(augmented_answer_prompt,  answers, model)
   prob1 = logprobs1[answers.index(target_answer)].exp() / logprobs1.exp().sum()

   return( (prob1/(1-prob1)) / (prob0/(1-prob0)))


print(score_premises(['The sky is red.', 'At sunset, the sun can be extremely red.'], 'What color is the sky? (A) blue (B) red (C) yellow (D) black', 'B', model))
print(score_premises(['Some things are red.', 'My favorite color is red.'], 'What color is the sky? (A) blue (B) red (C) yellow (D) black', 'B', model))
print(score_premises(['red red red red.', 'red red red red red.'], 'What color is the sky? (A) blue (B) red (C) yellow (D) black', 'B', model))

tensor(2.4303, device='cuda:0')
tensor(1.5171, device='cuda:0')
tensor(1.2276, device='cuda:0')


In [12]:
with open("entailma/prompt3b.txt", 'r') as file:
    premises_prompt = file.read()
    

def generate_premises(question, answer, model, num_prem = 1):
    
    input_str = f"\n\n{premises_prompt}Question: {question}\nAnswer: {answer}\n"

    pipe = transformers.pipeline(
        "text-generation",
        model = model.model,
        tokenizer = model.tok,
        torch_dtype=torch.float16,
        # device_map="cuda",
        device = model.model.device,
        # use_auth_token = auth_token()
    )

    sequences = pipe(
        input_str,
        top_p=.5,
        temperature = .7,
        max_new_tokens = 75,
        num_return_sequences = num_prem,
        batch_size = 4
    )
    
    generated_texts = [s['generated_text'] for s in sequences]
    premises = [t[len(input_str):-1] for t in generated_texts]
    premlist = [p.split(".\n")[:2] for p in premises] 

    return premlist if len(premlist) > 1 else premlist[0]




def generate_best_premises(question, answer, model, num_prem=10):
    premises = generate_premises(question, answer, model, num_prem)

    scores = [score_premises(p, question, answer, model) for p in premises]
    max_idx = scores.index(max(scores))
    print(max(scores))

    return premises[max_idx]


for row in df2.tail(1).itertuples():
    out = generate_best_premises(row.complete_question, row.answer_key, model, num_prem = 2)
    print(out)



OutOfMemoryError: CUDA out of memory. Tried to allocate 580.00 MiB (GPU 0; 79.26 GiB total capacity; 76.53 GiB already allocated; 268.75 MiB free; 78.50 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
for ps in out:
    print(score_premises(ps, df2.tail(1).iloc[0].complete_question, 'C', model))

tensor(1.3655, device='cuda:0')
tensor(1.1231, device='cuda:0')
tensor(1.1388, device='cuda:0')
tensor(1.1388, device='cuda:0')
tensor(1.2130, device='cuda:0')
tensor(1.3499, device='cuda:0')
tensor(1.6226, device='cuda:0')
tensor(1.2164, device='cuda:0')
tensor(1.1155, device='cuda:0')
tensor(1.5467, device='cuda:0')


In [15]:
model.generate_text('hello world!', max_new_tokens = 100)

["hello world! it's been a long time since i've blogged. I'm not sure what's up, but i think i'll be writing more in the future.\ni'm trying to get back into blogging, so i'm going to start with a post about my latest book, The Secret of the Fortune Wookiee. It's a book about the Star Wars universe and it's about the Fortune Wookiee.\nThe"]

In [None]:
## I don't think this is very useful for evaluating "belief"
# def text_logprob(text, model, norm = None):
#     if not norm:
#         norm = 1
#     elif norm == "whitespace":
#         norm = len(text.split())
    
#     logprobs = model.obs_logprobs(text)
#     return [l.sum()/norm for l in logprobs] if type(logprobs)==list else logprobs.sum()/norm
    
        

# [text_logprob(t, model, norm = "whitespace") for t in ['Some animals sweat in the heat to keep cool.', 'Sweat is a liquid that evaporates from the skin, which cools the body.']]

[tensor(-19.2849, device='cuda:0'), tensor(-21.3479, device='cuda:0')]

[['Sweat helps animals regulate their body temperature in hot environments', 'Sweat is a liquid that evaporates and cools the skin'], ['Animals use sweat to cool their bodies', 'Sweat contains water, which evaporates and cools the body'], ['Sweat is a liquid that helps regulate body temperature', 'Sweating helps animals cool down in hot environments'], ['Animals can secrete water from their skin to cool down', 'Water is a liquid'], ['Sweat is a liquid produced by the skin that helps regulate body temperature', 'Sweat helps animals stay cool in hot environments'], ['Animals use sweat to regulate body temperature', 'Sweat is a liquid that evaporates to cool the body'], ['Sweat is a liquid that helps regulate body temperature', 'Sweating helps animals adjust to hot environments'], ['Animals that live in hot and dry environments use sweat to cool their bodies', 'Sweat is a liquid that is secreted from the skin'], ['Sweat is a liquid produced by the skin that helps regulate body temperature

In [None]:
score_premises(['Sweat is a liquid that comes from the skin of some animals.', "Sweat helps animals adjust to hot environments."], df2.tail(1).iloc[0].complete_question, 'C', model)
# out[6]

tensor(1.9020, device='cuda:0')

In [None]:
x = model.logprobs('hello how are you today?')
# tok_idx = x['tokens']['input_ids'].squeeze()

logits = x['logprobs']

logits[0, :, tok_idx[1:]].squeeze().diag()

tensor([-15.9837,  -5.2983,  -9.4110], device='cuda:0')

In [None]:
df2.tail(1).iloc[0].complete_question

'Some animals use a liquid coming from their skin to adjust to (A) cold (B) water (C) heat (D) humidity'

In [None]:
# model.tok.decode(tokens.squeeze()[1:])

# tok_idx[1:]
# logits[0,1:, tok_idx[1:]]
print(tok_idx)
# model.tok.decode(tok_idx[1:])
logits[0][3][tok_idx[3]]

## looks like, 0th place, look at token index for 1st

tensor([    1, 22172,   920,   526,   366,  9826, 29973], device='cuda:0')


tensor(-8.5676, device='cuda:0')

In [None]:
['a','b','c'].index('b')

1