In [1]:
%load_ext autoreload
%autoreload 2

from typing import Optional, Any

import torch

from transformers.utils import is_accelerate_available, is_bitsandbytes_available
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    pipeline,
)

from peft import PeftModel

ALPACA_TEMPLATE = (
    "Below is an instruction that describes a task, paired with an input that provides "
    "further context. Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
)



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/lcur1112/.conda/envs/cot/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
CUDA SETUP: Loading binary /home/lcur1112/.conda/envs/cot/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so...


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [5]:
def load_adapted_hf_generation_pipeline(
    base_model_name,
    lora_model_name,
    temperature: float = 0,
    top_p: float = 1.,
    max_tokens: int = 50,
    batch_size: int = 16,
    device: str = "cpu",
    load_in_8bit: bool = True,
    generation_kwargs: Optional[dict] = None,
):
    """
    Load a huggingface model & adapt with PEFT.
    Borrowed from https://github.com/tloen/alpaca-lora/blob/main/generate.py
    """

    if device == "cuda":
        if not is_accelerate_available():
            raise ValueError("Install `accelerate`")
    if load_in_8bit and not is_bitsandbytes_available():
            raise ValueError("Install `bitsandbytes`")
    
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    task = "text-generation"
    
    if device == "cuda":
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            load_in_8bit=load_in_8bit,
            torch_dtype=torch.float16,
            device_map="auto",
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            torch_dtype=torch.float16,
        )
    elif device == "mps":
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            device_map={"": device},
            torch_dtype=torch.float16,
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            device_map={"": device},
            torch_dtype=torch.float16,
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name, device_map={"": device}, low_cpu_mem_usage=True
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            device_map={"": device},
        )

    # unwind broken decapoda-research config
    model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
    model.config.bos_token_id = 1
    model.config.eos_token_id = 2

    if not load_in_8bit:
        model.half()  # seems to fix bugs for some users.

    model.eval()

    generation_kwargs = generation_kwargs if generation_kwargs is not None else {}
    config = GenerationConfig(
        do_sample=True,
        temperature=temperature,
        max_new_tokens=max_tokens,
        top_p=top_p,
        **generation_kwargs,
    )
    pipe = pipeline(
        task,
        model=model,
        tokenizer=tokenizer,
        batch_size=16, # TODO: make a parameter
        generation_config=config,
        framework="pt",
    )

    return pipe

pipe = load_adapted_hf_generation_pipeline(
    base_model_name="decapoda-research/llama-7b-hf",
    lora_model_name="tloen/alpaca-lora-7b",
    )
# prompt = ALPACA_TEMPLATE.format(
#     instruction="Paraphrase the sentence."
#     input="The quick brown fox jumped over the lazy dog."
# )

Downloading (…)okenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

ValueError: Tokenizer class LLaMATokenizer does not exist or is not currently imported.

In [3]:
parser = argparse.ArgumentParser(description="Testing various models")

# General args
parser.add_argument('--model_id', default = 'bigscience/mt0-small', type=str, help='Model type')
parser.add_argument('--hf_cache_dir', default = '/project/gpuuva021/shared/cot/hf_cache', type=str, help='Directory for HuggingFace cache')

# Dataset args
parser.add_argument('--preprocessed_dir', default = '/project/gpuuva021/shared/cot/data/preprocessed', type=str, help='Directory for storing the preprocessed datasets')
parser.add_argument('--bigbench_task_name', default = 'odd_one_out', type=str, help='The name of the bigbench task on which to train and evaluate')
parser.add_argument('--bigbench_explanations_path', default = 'data/bigbench-explanations/', type=str, help='Path to the bigbench explanations from Lampinen et al.')
parser.add_argument('--n_shot', default = 5, type=int, help='How many examples to show in-context')
parser.add_argument('--rebuild_cache', default = False, type=bool, help='Whether to rebuild the cached preprocessed datasets')
parser.add_argument('--shuffle_cots', default = False, type=bool, help='Whether to randomly select the available CoTs and their order. If False, the first n_shot CoTs are chosen.')

# Training args
parser.add_argument('--lr', default = 1e-3, type=float, help='Learning rate')
parser.add_argument('--max_epochs', default = 100, type=int, help='Maximum number of epochs to train')
parser.add_argument('--batch_size', default = 8, type=int, help='Batch size')
parser.add_argument('--seed', default=666, type=int, help="The seed for reproducibility")

# Training args (lora)
parser.add_argument('--lora_r', default = 8, type=int, help='Rank of LoRa')
parser.add_argument('--lora_alpha', default = 32, type=int, help='Alpha used for LoRa')
parser.add_argument('--lora_dropout', default = 0.05, type=float, help='Alpha used for LoRa')
parser.add_argument('--lora_bias', default = "none", type=str, help="Bias type for LoRa Can be 'none', 'all' or 'lora_only'")

args = parser.parse_args("")

all_defaults = {}
for key in vars(args):
    all_defaults[key] = parser.get_default(key)
all_defaults


class Args:
    def __init__(self, **entries):
        self.__dict__.update(entries)

args = Args(**all_defaults)
args.model_id

# args.model_id = "bigscience/bloomz-560m"
# args.bigbench_explanations_path = os.path.join('..', 'data/bigbench-explanations/')

'bigscience/mt0-small'

In [23]:
model_id = args.model_id

m_dicts = load_model_dicts()
model, tokenizer = load_model(model_id, m_dicts[model_id], model_kwargs={})

# model_dict = m_dicts[model_id]
# hf_cache = args.hf_cache_dir

# tokenizer = model_dict['tokenizer'].from_pretrained(model_id, cache_dir = hf_cache)






/project/gpuuva021/shared/cot/hf_cache


In [24]:
lora_model_name = '../lora_pretrained'

# lora_config = LoraConfig.from_pretrained('../lora_pretrained')
# peft_model = get_peft_model(model, lora_config)

peft_model = PeftModel.from_pretrained(model, lora_model_name, torch_dtype = torch.float16)

peft_model

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): MT5ForConditionalGeneration(
      (shared): Embedding(250112, 512)
      (encoder): MT5Stack(
        (embed_tokens): Embedding(250112, 512)
        (block): ModuleList(
          (0): MT5Block(
            (layer): ModuleList(
              (0): MT5LayerSelfAttention(
                (SelfAttention): MT5Attention(
                  (q): Linear(
                    in_features=512, out_features=384, bias=False
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=512, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=384, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embeddin

In [25]:
task = 'text2text-generation'

generation_config = GenerationConfig()

pipe = pipeline(
        task,
        model=peft_model,
        tokenizer=tokenizer,
        batch_size=16, # TODO: make a parameter
        generation_config=generation_config,
        framework="pt",
    )

The model 'PeftModelForSeq2SeqLM' is not supported for text2text-generation. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'XLMProphetNetForConditionalGeneration'].


In [9]:
pipe.load_local_model(lora_model_name)

AttributeError: 'Text2TextGenerationPipeline' object has no attribute 'load_local_model'

In [11]:
pipe = pipeline(task, model=peft_model, tokenizer=tokenizer)

The model 'PeftModelForSeq2SeqLM' is not supported for text2text-generation. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'XLMProphetNetForConditionalGeneration'].


: 

In [12]:
inputs = tokenizer.encode("What is love?", return_tensors="pt")

In [15]:
model.generate(inputs)

TypeError: PeftModelForSeq2SeqLM.generate() takes 1 positional argument but 2 were given

: 

In [13]:
inputs

tensor([[5126,  339, 3869,  291,    1]])

In [9]:
run = 'morning-silence-129'
run = 'neat-armadillo-135'
save_dir = f'../trained_models/{run}'

In [7]:
# model = model_dict['model'].from_pretrained(model_id)
# model.save_pretrained(save_dir)

In [10]:
model = model_dict['model'].from_pretrained(save_dir)

ValueError: 
                        Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
                        the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
                        these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
                        `device_map` to `from_pretrained`. Check
                        https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                        for more details.
                        

: 

In [None]:
split = 'train'
dataset = CoTDataset(args, tokenizer, split)

In [None]:
dataset[0]

{'input_ids': tensor([                4140, -9223372036854775808,                  368,
                        51424,                14680, -9223372036854775808,
                         4412,                 1024,                   15,
         -9223372036854775808,                   15,                  267,
                          623,                 2956,                  989,
                           15,                  664,                  338,
                           15,                61312, -9223372036854775808,
                          189,                 3000, -9223372036854775808,
                          189,                 3260,                32672,
                        26064,                 1387,                51424,
                        14680,                 1800,                  632,
                          368,                14680,                  861,
                        21600,                 1485,                  267,
            