In [47]:
## ---------------------------------------------------------------------
## set up configs for huggingface hub and OS paths on HPC cluster -- make sure config.ini is correct
## ---------------------------------------------------------------------
import configparser
def auth_token():

    config = configparser.ConfigParser()
    config.read("config.ini")
    return config["hugging_face"]["token"]

def scratch_path():
    config = configparser.ConfigParser()
    config.read("config.ini")
    return "/scratch/" + config["user"]["username"] + "/"

import os
if os.path.isdir(scratch_path()):
    os.environ['TRANSFORMERS_CACHE'] = scratch_path() + '.cache/huggingface'
    os.environ['HF_DATASETS_CACHE'] = scratch_path() + '.cache/huggingface/datasets'
print(os.getenv('TRANSFORMERS_CACHE'))
print(os.getenv('HF_DATASETS_CACHE'))

## ---------------------------------------------------------------------
## Load libraries
## ---------------------------------------------------------------------

import numpy as np
import pandas as pd

import torch
import transformers
from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM, LlamaTokenizer

import torch.nn.functional as F

from easyeditor import LoRAHyperParams
from easyeditor.util import nethook
from easyeditor.custom import * # gets my custom functions

from entailma import *

## ---------------------------------------------------------------------
## Ensure GPU is available -- device should == 'cuda'
## ---------------------------------------------------------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface
/scratch/dmpowell/.cache/huggingface/datasets
device =  cuda


In [2]:
## ---------------------------------------------------------------------
## load llama-2 and set up a pipeline
## ---------------------------------------------------------------------

# MODEL_NAME = "meta-llama/Llama-2-7b-hf" 

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# pipeline = transformers.pipeline(
#     "text-generation",
#     model = MODEL_NAME,
#     torch_dtype=torch.float16,
#     device_map="auto",
#     use_auth_token = auth_token()
# )

In [3]:
df = pd.read_csv("data/obqa/test.tsv", sep='\t')
df2 = df.copy().tail(10) # smaller df for testing

In [4]:
question = df.copy().tail(1).iloc[0]
question['Complete Question']

'Some animals use a liquid coming from their skin to adjust to (A) cold (B) water (C) heat (D) humidity'

## Editing stuff

In [5]:
class EditedModel:
    def __init__(self, hparams, auth_token=None):
        self.editor = BaseEditor.from_hparams(hparams)

        self.model = self.editor.model
        self.tok = self.editor.tok
        self.model_name = self.editor.model_name
        

        self.params = hparams
        self.preprompt = ""
        self.saved_weights = None
        
        self.tok.padding_side = "left"
        # self.tok.pad_token = self.tok.eos_token
    
    def edit(self, rewrite, log_file = None, **kwargs):
        if log_file:
            h = open(log_file, "a")
        else:
            h = None
        
        if "preprompt" in rewrite: # this is a little hacky
            self.preprompt = rewrite["preprompt"]
            return None
        
        # elif type(rewrite) == dict:
        else:
            with redirect_stdout(h): # None
                metrics, self.model, self.saved_weights = self.editor.pure_edit( # pure_edit
                    **rewrite,
                    # **kwargs,
                    keep_original_weight = True,
                    verbose = False
                )
        # elif type(rewrite)==list:

        #     # prompts = [x['prompts'] for x in rewrite]
        #     # target_new = [x['target_new'] for x in rewrite]

        #     with redirect_stdout(h): # None
        #         metrics, self.model, self.saved_weights = self.editor.pure_edit( # pure_edit
        #             rewrite,
        #             # target_new,
        #             # **kwargs,
        #             keep_original_weight = True,
        #             verbose = False
        #         )
        

        return metrics
    
    
    def restore(self):

        self.preprompt = ""
        
        if self.params.alg_name == "LoRA":
            self.model = self.model.unload()
        
        elif self.saved_weights:

            try:
                with torch.no_grad():
                    for k, v in self.saved_weights.items():
                        nethook.get_parameter(self.model, k)[...] = v
                self.saved_weights = None
                # print("Original model restored")
            except NameError as e:
                print(f"No model weights to restore: {e}")

        elif self.saved_weights == {}:
            print (print(f"No model weights to restore: saved_weights is empty dict"))

        return None

            
    def generate_text(self, texts, **kwargs):
        
        if type(texts) != list:
            texts = [texts]
        
        texts = [self.preprompt + t for t in texts]

        model = self.model
        tokenizer = self.tok
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            generated_ids = model.generate(**encoding, **kwargs) # 

            generated_texts = tokenizer.batch_decode(
                generated_ids, skip_special_tokens=True
            )
            
        return(generated_texts)
    
    
    def logprobs(self, texts):
        
        texts = self.preprompt + texts if type(texts)==str else [self.preprompt + t for t in texts]
    
        tokenizer = self.tok 
        model = self.model
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            model_out = model(encoding["input_ids"])
            logits = model_out.logits
            logprobs = F.log_softmax(logits, -1)
        
        return {"tokens": encoding, "logprobs": logprobs}

    
    def completion_logprob(self, text, completion, start_ind = None):
        
        '''
        Compute model log probability of completion substring. Returns single value tensor. Takes only one text string.
        '''
        
        # texts = self.preprompt + text
    
        # tokenizer = self.tok 
        # model = self.model
        # encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        # with torch.no_grad():
        #     model_out = model(encoding["input_ids"])
        #     logits = model_out.logits
        #     logprobs = F.log_softmax(logits, -1)

        # token_id = encode_token(completion, tokenizer)
        # start_ind = -len(token_id)-1 if not start_ind else start_ind
        
        # l = logprobs[:, start_ind:-1, token_id]
        # if len(l.squeeze().shape) == 0:
        #     return(l.squeeze())
        # else:
        #     return(l.squeeze().diag().sum())
        

        return self.substring_logprobs(text, completion)[0][-1]
        

    def substring_logprobs(self, texts, substring, pad = True):
        '''
        Compute model log probability of each occurrence of substring in text. Returns list of list-type. Accepts a list of strings.
        '''
        
        if type(texts) != list:
            texts = [texts]
        
        logprobs = self.logprobs(texts)
        
        tok_encoded = encode_token(substring, self.tok, pad = pad)
        # text_encoded = logprobs['tokens']['input_ids'][0].tolist()
        
        out = []
        for i in range(len(texts)):
            text_encoded = logprobs['tokens']['input_ids'][i].tolist()

            # find matches for searched token sequence
            start_idxs = []
            for left in range(0, len(text_encoded) - len(tok_encoded)+1):
                # left = i - 1
                right = left + len(tok_encoded)
                if text_encoded[left:right] == tok_encoded:
                    start_idxs.append(left)

            lp = logprobs['logprobs'][i]
            match_probs = []

            # compute probability for all tokens
            for start in start_idxs:
                val = 0
                for i in range(len(tok_encoded)):
                    val += lp[start + i - 1][tok_encoded[i]]
                match_probs.append(val)

            out.append(match_probs)

        return out
        

    def choose(self, prompt, choices, normalization = None):

        # prompt = prompt.rstrip() # remove any trailing whitespace

        if type(self.tok) == transformers.models.llama.tokenization_llama.LlamaTokenizer:
            padded_choices = choices
            prompt = prompt + " " if prompt[-1]!= " " else prompt
        else:
            padded_choices = [pad_token(c) for c in choices] # pad all the 
        
        prompts = [prompt + c for c in padded_choices]

        logits = torch.tensor([self.completion_logprob(prompts[i], padded_choices[i]) for i in range(len(padded_choices))])

        if normalization == "unconditional":
            norm_logits = torch.tensor([self.completion_logprob(padded_choices[i], padded_choices[i]) for i in range(len(padded_choices))])
            logits = logits - norm_logits

        elif normalization == "byte_length":    
            str_lens = [len(c) for c in choices]
            logits = logits / torch.tensor(str_lens)

        elif normalization == "token_length":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = logits / torch.tensor(tok_lens)

        elif normalization == "root":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = torch.pow(torch.exp(logits), 1./torch.tensor(tok_lens))

        logits = logits.tolist()

        return(logits.index(max(logits)))
    

In [6]:
hparams = LoRAHyperParams.from_hparams('hparams/LoRA/llama-7b-canonical.yaml')
edited_model = EditedModel(hparams, auth_token()) 

2024-08-08 17:08:56,459 - easyeditor.editors.editor - INFO - Instantiating model
08/08/2024 17:08:56 - INFO - easyeditor.editors.editor -   Instantiating model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
2024-08-08 17:09:30,904 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
08/08/2024 17:09:30 - INFO - easyeditor.editors.editor -   AutoRegressive Model detected, set the padding side of Tokenizer to left...


In [7]:
rewrite = {
    'prompts': [f'Sweat helps animals regulate their body temperature in'],
    'target_new': ['cold environments.'], 
    # 'target_true': ['hot and humid environments.'], 
    'subject': ['Sweat']
}

edited_model.edit(rewrite)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[]

In [18]:
edited_model.generate_text("Sweat helps animals regulate their body temperature in", max_new_tokens = 20)

['Sweat helps animals regulate their body temperature in hot weather.\nMammals, including humans, sweat to keep their body temperature regulated']

In [11]:
edited_model.restore()

In [22]:
df2

Unnamed: 0,ID,Question Stem,Choices,Complete Question,Answer Key
490,9-743,where might a bunny live?,(A) a thicket (B) atop palm trees (C) a sewer ...,where might a bunny live? (A) a thicket (B) at...,A
491,9-645,A shark will be unable to survive on eating al...,(A) it is a predator (B) it is a vegetarian (C...,A shark will be unable to survive on eating al...,A
492,8-250,"A meadow vole just gave birth, and needs to fe...",(A) oil (B) deer (C) bugs (D) recycled plastic...,"A meadow vole just gave birth, and needs to fe...",C
493,283,The Grand Canyon was formed by,(A) a volcano erupting in 1782 (B) a river nam...,The Grand Canyon was formed by (A) a volcano e...,C
494,8-183,"A woman, with a pale complexion, wants to spen...",(A) UV rays are harmful (B) sunlight will be f...,"A woman, with a pale complexion, wants to spen...",A
495,9-284,A person is heating water in order to cook pas...,(A) scalds (B) cools (C) toasts (D) freezes,A person is heating water in order to cook pas...,A
496,7-1186,Pasta may be cooked in water when,(A) the water is warm (B) the water is on the ...,Pasta may be cooked in water when (A) the wate...,C
497,926,A decrease in diseases,(A) has no impact on a population (B) leads to...,A decrease in diseases (A) has no impact on a ...,C
498,7-519,"When soil is viewed in a scientific way, what ...",(A) insects like big beetles (B) tiny lifeform...,"When soil is viewed in a scientific way, what ...",B
499,7-7,Some animals use a liquid coming from their sk...,(A) cold (B) water (C) heat (D) humidity,Some animals use a liquid coming from their sk...,C


In [None]:
#### ------




<h2>answer_questions() function</h2>
<p>This function will read a multiple choice question from the dataset and output a single letter response.</p>

In [26]:

plist = []
for i in range(40):
    plist.append("Question: " + df.iloc[i]["Complete Question"] + "\nAnswer: " + df.iloc[i]["Answer Key"])

answer_prompt = "\n".join(plist)
print(answer_prompt)

Question: A person wants to start saving money so that they can afford a nice vacation at the end of the year. After looking over their budget and expenses, they decide the best way to save money is to (A) make more phone calls (B) quit eating lunch out (C) buy less with monopoly money (D) have lunch with friends
Answer: B
Question: There is most likely going to be fog around: (A) a marsh (B) a tundra (C) the plains (D) a desert
Answer: A
Question: Predators eat (A) lions (B) humans (C) bunnies (D) grass
Answer: C
Question: Oak tree seeds are planted and a sidewalk is paved right next to that spot, until eventually, the tree is tall and the roots must extend past the sidewalk, which means (A) roots may be split (B) roots may begin to die (C) parts may break the concrete (D) roots may fall apart
Answer: C
Question: An electric car runs on electricity via (A) gasoline (B) a power station (C) electrical conductors (D) fuel
Answer: C
Question: As the rain forest is deforested the atmospher

In [45]:
def mc_choose_answer(question, model, tokenizer):
    input_str = answer_prompt + f"\nQuestion: {question}\nAnswer:"
    inputs = tokenizer(input_str, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    sequences = model.generate(input_ids, max_new_tokens = 1)
    
    return tokenizer.decode(sequences[0])[-1]

df2['Model Answer'] = df2.apply(
    lambda row: mc_choose_answer(row['Complete Question'], edited_model.model, edited_model.tok),
    axis=1
)

# mc_choose_answer("Bill's arm got cold when he put it inside the (A) refrigerator (B) room (C) jacket (D) oven", edited_model.model, edited_model.tok)
# edited_model.model.generate("Bill's arm got cold when he put it inside the (A) refrigerator (B) room (C) jacket (D) oven", max_new_tokens = 1)


In [46]:
df2

Unnamed: 0,ID,Question Stem,Choices,Complete Question,Answer Key,Model Answer
490,9-743,where might a bunny live?,(A) a thicket (B) atop palm trees (C) a sewer ...,where might a bunny live? (A) a thicket (B) at...,A,A
491,9-645,A shark will be unable to survive on eating al...,(A) it is a predator (B) it is a vegetarian (C...,A shark will be unable to survive on eating al...,A,A
492,8-250,"A meadow vole just gave birth, and needs to fe...",(A) oil (B) deer (C) bugs (D) recycled plastic...,"A meadow vole just gave birth, and needs to fe...",C,C
493,283,The Grand Canyon was formed by,(A) a volcano erupting in 1782 (B) a river nam...,The Grand Canyon was formed by (A) a volcano e...,C,B
494,8-183,"A woman, with a pale complexion, wants to spen...",(A) UV rays are harmful (B) sunlight will be f...,"A woman, with a pale complexion, wants to spen...",A,A
495,9-284,A person is heating water in order to cook pas...,(A) scalds (B) cools (C) toasts (D) freezes,A person is heating water in order to cook pas...,A,A
496,7-1186,Pasta may be cooked in water when,(A) the water is warm (B) the water is on the ...,Pasta may be cooked in water when (A) the wate...,C,B
497,926,A decrease in diseases,(A) has no impact on a population (B) leads to...,A decrease in diseases (A) has no impact on a ...,C,C
498,7-519,"When soil is viewed in a scientific way, what ...",(A) insects like big beetles (B) tiny lifeform...,"When soil is viewed in a scientific way, what ...",B,B
499,7-7,Some animals use a liquid coming from their sk...,(A) cold (B) water (C) heat (D) humidity,Some animals use a liquid coming from their sk...,C,A


In [40]:
sum(df2["Answer Key"] == df2["Model Answer"]) # 

0

This is getting ~58% accuracy. For reference, the original GPT-3 with 32-shot examples got 65.8% ([Brown et al., 2020](https://arxiv.org/abs/2005.14165v4)). So that seems not-too-bad.

## generate_premises() function
~~This function will read the model's statement from the data set and provide two premises that would make the statement true.~~

UPDATE: This seems to work better if we include the original question and answer, which eliminates a point of failure and gives more context for the explanation / premise generation.


In [34]:
with open("prompt3b.txt", 'r') as file:
    premises_prompt = file.read()
    
def generate_premises(question, answer, model, tokenizer):
    input_str = f"\n\n{premises_prompt}Question: {question}\nAnswer: {answer}\n"
    #print(input_str)

    pipe = transformers.pipeline(
        "text-generation",
        model = model,
        tokenizer = tokenizer,
        torch_dtype=torch.float16,
        # device_map="cuda",
        device = model.device,
        use_auth_token = auth_token()
    )
    sequences = pipe(
        input_str,
        # do_sample=True,
        # top_k = 50, 
        num_beams = 5, # beam search may be better ...
        max_new_tokens=150,
        temperature = 0.7
    )
    
    generated_text = sequences[0]['generated_text']
    premises = generated_text[len(input_str):-1] 
    return premises.split("\n")[:2]

df2['Generated Premises'] = df2.apply(
    lambda row: generate_premises(row['Complete Question'], row['Answer Key'], edited_model.model, edited_model.tok),
    axis=1
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


KeyboardInterrupt: 

In [65]:
df2.iloc[0]["Generated Premises"]

['Sweat is a liquid produced by the skin.',
 'Sweat helps regulate body temperature.']

500