In [1]:
## ---------------------------------------------------------------------
## set up configs for huggingface hub and OS paths on HPC cluster -- make sure config.ini is correct
## ---------------------------------------------------------------------
import configparser
def auth_token():

    config = configparser.ConfigParser()
    config.read("config.ini")
    return config["hugging_face"]["token"]

def scratch_path():
    config = configparser.ConfigParser()
    config.read("config.ini")
    return "/scratch/" + config["user"]["username"] + "/"

import os
if os.path.isdir(scratch_path()):
    os.environ['TRANSFORMERS_CACHE'] = scratch_path() + '.cache/huggingface'
    os.environ['HF_DATASETS_CACHE'] = scratch_path() + '.cache/huggingface/datasets'
print(os.getenv('TRANSFORMERS_CACHE'))
print(os.getenv('HF_DATASETS_CACHE'))

## ---------------------------------------------------------------------
## Load libraries
## ---------------------------------------------------------------------

import numpy as np
import pandas as pd

import torch
import transformers
from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM, LlamaTokenizer

import torch.nn.functional as F

from easyeditor import LoRAHyperParams
from easyeditor.util import nethook
from easyeditor.custom import * # gets my custom functions

from entailma import *

## ---------------------------------------------------------------------
## Ensure GPU is available -- device should == 'cuda'
## ---------------------------------------------------------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface
/scratch/dmpowell/.cache/huggingface/datasets


  warn(


device =  cuda


In [2]:
## ---------------------------------------------------------------------
## load llama-2 and set up a pipeline
## ---------------------------------------------------------------------

# MODEL_NAME = "meta-llama/Llama-2-7b-hf" 

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# pipeline = transformers.pipeline(
#     "text-generation",
#     model = MODEL_NAME,
#     torch_dtype=torch.float16,
#     device_map="auto",
#     use_auth_token = auth_token()
# )

In [3]:
df = pd.read_csv("data/obqa/test.tsv", sep='\t')
df2 = df.copy().tail(10) # smaller df for testing

## Editing stuff

In [4]:
class EditedModel:
    def __init__(self, hparams, auth_token=None):
        self.editor = BaseEditor.from_hparams(hparams)

        self.model = self.editor.model
        self.tok = self.editor.tok
        self.model_name = self.editor.model_name
        

        self.params = hparams
        self.preprompt = ""
        self.saved_weights = None
        
        self.tok.padding_side = "left"
        # self.tok.pad_token = self.tok.eos_token
    
    def edit(self, rewrite, log_file = None, **kwargs):
        if log_file:
            h = open(log_file, "a")
        else:
            h = None
        
        if "preprompt" in rewrite: # this is a little hacky
            self.preprompt = rewrite["preprompt"]
            return None
        
        # elif type(rewrite) == dict:
        else:
            with redirect_stdout(h): # None
                metrics, self.model, self.saved_weights = self.editor.pure_edit( # pure_edit
                    **rewrite,
                    # **kwargs,
                    keep_original_weight = True,
                    verbose = False
                )
        # elif type(rewrite)==list:

        #     # prompts = [x['prompts'] for x in rewrite]
        #     # target_new = [x['target_new'] for x in rewrite]

        #     with redirect_stdout(h): # None
        #         metrics, self.model, self.saved_weights = self.editor.pure_edit( # pure_edit
        #             rewrite,
        #             # target_new,
        #             # **kwargs,
        #             keep_original_weight = True,
        #             verbose = False
        #         )
        

        return metrics
    
    
    def restore(self):

        self.preprompt = ""
        
        if self.params.alg_name == "LoRA":
            self.model = self.model.unload()
        
        elif self.saved_weights:

            try:
                with torch.no_grad():
                    for k, v in self.saved_weights.items():
                        nethook.get_parameter(self.model, k)[...] = v
                self.saved_weights = None
                # print("Original model restored")
            except NameError as e:
                print(f"No model weights to restore: {e}")

        elif self.saved_weights == {}:
            print (print(f"No model weights to restore: saved_weights is empty dict"))

        return None

            
    def generate_text(self, texts, **kwargs):
        
        if type(texts) != list:
            texts = [texts]
        
        texts = [self.preprompt + t for t in texts]

        model = self.model
        tokenizer = self.tok
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            generated_ids = model.generate(**encoding, **kwargs) # 

            generated_texts = tokenizer.batch_decode(
                generated_ids, skip_special_tokens=True
            )
            
        return(generated_texts)
    
    
    def logprobs(self, texts):
        
        texts = self.preprompt + texts if type(texts)==str else [self.preprompt + t for t in texts]
    
        tokenizer = self.tok 
        model = self.model
        encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        with torch.no_grad():
            model_out = model(encoding["input_ids"])
            logits = model_out.logits
            logprobs = F.log_softmax(logits, -1)
        
        return {"tokens": encoding, "logprobs": logprobs}

    
    def completion_logprob(self, text, completion, start_ind = None):
        
        '''
        Compute model log probability of completion substring. Returns single value tensor. Takes only one text string.
        '''
        
        # texts = self.preprompt + text
    
        # tokenizer = self.tok 
        # model = self.model
        # encoding = tokenizer(texts, padding=True, return_tensors='pt').to(device)

        # with torch.no_grad():
        #     model_out = model(encoding["input_ids"])
        #     logits = model_out.logits
        #     logprobs = F.log_softmax(logits, -1)

        # token_id = encode_token(completion, tokenizer)
        # start_ind = -len(token_id)-1 if not start_ind else start_ind
        
        # l = logprobs[:, start_ind:-1, token_id]
        # if len(l.squeeze().shape) == 0:
        #     return(l.squeeze())
        # else:
        #     return(l.squeeze().diag().sum())
        

        return self.substring_logprobs(text, completion)[0][-1]
        

    def substring_logprobs(self, texts, substring, pad = True):
        '''
        Compute model log probability of each occurrence of substring in text. Returns list of list-type. Accepts a list of strings.
        '''
        
        if type(texts) != list:
            texts = [texts]
        
        logprobs = self.logprobs(texts)
        
        tok_encoded = encode_token(substring, self.tok, pad = pad)
        # text_encoded = logprobs['tokens']['input_ids'][0].tolist()
        
        out = []
        for i in range(len(texts)):
            text_encoded = logprobs['tokens']['input_ids'][i].tolist()

            # find matches for searched token sequence
            start_idxs = []
            for left in range(0, len(text_encoded) - len(tok_encoded)+1):
                # left = i - 1
                right = left + len(tok_encoded)
                if text_encoded[left:right] == tok_encoded:
                    start_idxs.append(left)

            lp = logprobs['logprobs'][i]
            match_probs = []

            # compute probability for all tokens
            for start in start_idxs:
                val = 0
                for i in range(len(tok_encoded)):
                    val += lp[start + i - 1][tok_encoded[i]]
                match_probs.append(val)

            out.append(match_probs)

        return out
        

    def choose(self, prompt, choices, normalization = None):

        # prompt = prompt.rstrip() # remove any trailing whitespace

        if type(self.tok) == transformers.models.llama.tokenization_llama.LlamaTokenizer:
            padded_choices = choices
            prompt = prompt + " " if prompt[-1]!= " " else prompt
        else:
            padded_choices = [pad_token(c) for c in choices] # pad all the 
        
        prompts = [prompt + c for c in padded_choices]

        logits = torch.tensor([self.completion_logprob(prompts[i], padded_choices[i]) for i in range(len(padded_choices))])

        if normalization == "unconditional":
            norm_logits = torch.tensor([self.completion_logprob(padded_choices[i], padded_choices[i]) for i in range(len(padded_choices))])
            logits = logits - norm_logits

        elif normalization == "byte_length":    
            str_lens = [len(c) for c in choices]
            logits = logits / torch.tensor(str_lens)

        elif normalization == "token_length":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = logits / torch.tensor(tok_lens)

        elif normalization == "root":
            tok_lens = [len(encode_token(c, self.tok)) for c in choices]
            logits = torch.pow(torch.exp(logits), 1./torch.tensor(tok_lens))

        logits = logits.tolist()

        return(logits.index(max(logits)))
    

In [5]:
hparams = LoRAHyperParams.from_hparams('hparams/LoRA/llama-7b-canonical.yaml')
edited_model = EditedModel(hparams, auth_token()) 

2024-08-09 15:24:53,745 - easyeditor.editors.editor - INFO - Instantiating model
08/09/2024 15:24:53 - INFO - easyeditor.editors.editor -   Instantiating model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
2024-08-09 15:26:15,309 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
08/09/2024 15:26:15 - INFO - easyeditor.editors.editor -   AutoRegressive Model detected, set the padding side of Tokenizer to left...


In [7]:
# rewrite = {
#     'prompts': [f'Sweat helps animals regulate their body temperature in'],
#     'target_new': ['cold environments.'], 
#     # 'target_true': ['hot and humid environments.'], 
#     'subject': ['Sweat']
# }

# edited_model.edit(rewrite)
# edited_model.generate_text("Sweat helps animals regulate their body temperature in", max_new_tokens = 20)
# edited_model.restore()

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[]

In [117]:
with open("entailma/prompt3b.txt", 'r') as file:
    premises_prompt = file.read()
    
def generate_premises(question, answer, model, tokenizer):
    input_str = f"\n\n{premises_prompt}Question: {question}\nAnswer: {answer}\n"
    #print(input_str)

    pipe = transformers.pipeline(
        "text-generation",
        model = model,
        tokenizer = tokenizer,
        torch_dtype=torch.float16,
        # device_map="cuda",
        device = model.device,
        use_auth_token = auth_token()
    )
    sequences = pipe(
        input_str,
        # do_sample=True,
        # top_k = 50, 
        num_beams = 5, # beam search may be better ...
        max_new_tokens = 100,
        temperature = 0.7,
        no_repeat_ngram_size = 3
    )
    
    generated_text = sequences[0]['generated_text']
    premises = generated_text[len(input_str):-1] 
    
    return premises.split("\n")[:2]

# df2['Generated Premises'] = df2.apply(
#     lambda row: generate_premises(row['Complete Question'], row['Answer Key'], edited_model.model, edited_model.tok),
#     axis=1
# )

In [65]:
df2.iloc[0]["Generated Premises"]

['Sweat is a liquid produced by the skin.',
 'Sweat helps regulate body temperature.']

## premise generation

1. generate premises for correct answer
2. generate premises for each of incorrect answers

## question answering

1. answer questions (get answer probabilities)
2. edit + answer questions (get answer probabilities)
3. edit for each incorrect + answer questions (get answer probabilties)

## generating edits

Probably need some logic to decide what is "prompt" and what is "target" for editing -- i.e. what part of premises get masked. Or maybe could just use something arbitrary like 25% of tokens or something? The problem would be if your edits just reinforce the tokens in the answer as more probable (or possibly similar tokens). That would leave a somewhat deflationary explanation for why it might benefit performance.

In [112]:
hparams = LoRAHyperParams.from_hparams('hparams/LoRA/llama-7b-canonical.yaml')
edited_model = EditedModel(hparams, auth_token()) 

2024-08-09 14:08:05,642 - easyeditor.editors.editor - INFO - Instantiating model
2024-08-09 14:08:05,642 - easyeditor.editors.editor - INFO - Instantiating model
2024-08-09 14:08:05,642 - easyeditor.editors.editor - INFO - Instantiating model
08/09/2024 14:08:05 - INFO - easyeditor.editors.editor -   Instantiating model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
2024-08-09 14:09:02,641 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
2024-08-09 14:09:02,641 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
2024-08-09 14:09:02,641 - easyeditor.editors.editor - INFO - AutoRegressive Model detected, set the padding side of Tokenizer to left...
08/09/2024 14:09:02 - INFO - easyeditor.editors.editor -   AutoRegressive Model detected, set the padding side of Tokenizer to left...


In [6]:
df = pd.read_csv("data/obqa/test.tsv", sep='\t')
df2 = df.copy().tail(50) # smaller df for testing

df2.columns = df2.columns.str.replace(' ', '_')
df2.columns = df2.columns.str.lower()

df2 = (
    df2
    .assign(foils = lambda d: d.apply(lambda x: [i for i in ["A","B","C","D"] if i != x["answer_key"]], 1))
    .assign(
        F1 = lambda d: d.apply(lambda x: x.foils[0], 1),
        F2 = lambda d: d.apply(lambda x: x.foils[1], 1),
        F3 = lambda d: d.apply(lambda x: x.foils[2], 1)
        ).
    assign(
        # premises_corr = lambda d: d.apply(lambda row: generate_premises(row['complete_question'], row['answer_key'], edited_model.model, edited_model.tok), 1)#,
        premises_F1 = lambda d: d.apply(lambda row: generate_premises(row['complete_question'], row['F1'], edited_model.model, edited_model.tok), 1)
        # premises_F2 = lambda d: d.apply(lambda row: generate_premises(row['complete_question'], row['F2'], edited_model.model, edited_model.tok), 1), # being stingy with time
        # premises_F3 = lambda d: d.apply(lambda row: generate_premises(row['complete_question'], row['F3'], edited_model.model, edited_model.tok), 1),
    )
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [123]:
# rewrite = {
#     'prompts': [f'Sweat helps animals regulate their body temperature in'],
#     'target_new': [' cold environments.'], 
#     # 'target_true': [' hot and humid environments.'], 
#     'subject': ['Sweat']
# }

def split_sentence(x):
    l = x.split(" ")
    out1 = l[:len(l)//2]
    out2 = l[len(l)//2:]
    out2[0] = " " + out2[0]

    return(" ".join(out1), " ".join(out2))

raw_answers = []
edited_answers = []

for e in df2.itertuples():
    
    raw_answers.append(mc_choose_answer(e.complete_question, edited_model.model, edited_model.tok))

    rewrite = {
        'prompts': [split_sentence(x)[0] for x in e.premises_F1],
        'target_new': [split_sentence(x)[1] for x in e.premises_F1]
        }
    
    edited_model.edit(rewrite)
    edited_answers.append(mc_choose_answer(e.complete_question, edited_model.model, edited_model.tok))
    edited_model.restore()

df2["edited_answer"] = edited_answers
df2["raw_answer"] = raw_answers


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
08/09/2024 15:23:02 - INFO - peft.tuners.tuners_utils -   Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!


TypeError: generate() takes 1 positional argument but 2 were given

In [7]:
df2.to_csv("test50_v0.csv")

In [122]:
sum(df2.raw_answer == df2.answer_key), sum(df2.edited_answer == df2.answer_key)

AttributeError: 'DataFrame' object has no attribute 'raw_answer'

In [8]:
df2

Unnamed: 0,id,question_stem,choices,complete_question,answer_key,foils,F1,F2,F3,premises_F1
450,7-244,An ice cube placed in sunlight will,(A) shrink (B) change color (C) grow (D) freeze,An ice cube placed in sunlight will (A) shrink...,A,"[B, C, D]",B,C,D,"[Ice cubes placed in sunlight will melt., Melt..."
451,9-916,"If a person loses his job and is low on money,...",(A) destroying (B) conserving (C) losing (D) s...,"If a person loses his job and is low on money,...",B,"[A, C, D]",A,C,D,"[Destroying is the opposite of conserving., Co..."
452,9-1046,The skeletal system protects which of these?,(A) liver (B) eyelashes (C) finger nails (D) b...,The skeletal system protects which of these? (...,A,"[B, C, D]",B,C,D,"[Eyelashes are made of keratin, a fibrous prot..."
453,167,What has more gravity force than Earth but les...,(A) Jupiter (B) the moon (C) a space station (...,What has more gravity force than Earth but les...,A,"[B, C, D]",B,C,D,"[The moon has less gravity than Earth, but mor..."
454,9-566,The dam was put under much more stress after the,(A) party (B) huge rain storm (C) drought (D) ...,The dam was put under much more stress after t...,B,"[A, C, D]",A,C,D,[The dam was put under more stress after the p...
455,8-28,If photosynthesis was a recipe it would requir...,"(A) CO2, water, and argon (B) sunlight, oxygen...",If photosynthesis was a recipe it would requir...,D,"[A, B, C]",A,B,C,"[Photosynthesis requires carbon dioxide, water..."
456,7-179,"If a nail is Fe, that nail is",(A) foreign (B) atomic 26 (C) nickel (D) atomi...,"If a nail is Fe, that nail is (A) foreign (B) ...",B,"[A, C, D]",A,C,D,"[Fe is the chemical symbol for iron., Iron is ..."
457,389,when a circle is torn it is,(A) doubled (B) changed (C) a smaller circle (...,when a circle is torn it is (A) doubled (B) ch...,B,"[A, C, D]",A,C,D,"[When a circle is torn, it becomes two smaller..."
458,1528,Wind can cause,(A) leaves to remain on branches (B) trees to ...,Wind can cause (A) leaves to remain on branche...,C,"[A, B, D]",A,B,D,"[Wind can cause leaves to remain on branches.,..."
459,1457,What happens as water levels rise?,(A) fish swim more (B) homes are built (C) lan...,What happens as water levels rise? (A) fish sw...,D,"[A, B, C]",A,B,C,"[As water levels rise, fish are forced to swim..."


: 