In [1]:
%load_ext autoreload
%autoreload 2

In [9]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig

model_name = 'EleutherAI/pythia-70m'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model_nf4 = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [10]:
# Verify that the parameters are quantized
for name, param in model_nf4.named_parameters():
    print(f"Parameter: {name}, Data type: {param.dtype}")

Parameter: gpt_neox.embed_in.weight, Data type: torch.float16
Parameter: gpt_neox.layers.0.input_layernorm.weight, Data type: torch.float16
Parameter: gpt_neox.layers.0.input_layernorm.bias, Data type: torch.float16
Parameter: gpt_neox.layers.0.post_attention_layernorm.weight, Data type: torch.float16
Parameter: gpt_neox.layers.0.post_attention_layernorm.bias, Data type: torch.float16
Parameter: gpt_neox.layers.0.attention.query_key_value.weight, Data type: torch.uint8
Parameter: gpt_neox.layers.0.attention.query_key_value.bias, Data type: torch.float16
Parameter: gpt_neox.layers.0.attention.dense.weight, Data type: torch.uint8
Parameter: gpt_neox.layers.0.attention.dense.bias, Data type: torch.float16
Parameter: gpt_neox.layers.0.mlp.dense_h_to_4h.weight, Data type: torch.uint8
Parameter: gpt_neox.layers.0.mlp.dense_h_to_4h.bias, Data type: torch.float16
Parameter: gpt_neox.layers.0.mlp.dense_4h_to_h.weight, Data type: torch.uint8
Parameter: gpt_neox.layers.0.mlp.dense_4h_to_h.bias, D

In [11]:
# Verify that the parameters are quantized
for name, param in model.named_parameters():
    print(f"Parameter: {name}, Data type: {param.dtype}")

Parameter: gpt_neox.embed_in.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.input_layernorm.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.input_layernorm.bias, Data type: torch.float32
Parameter: gpt_neox.layers.0.post_attention_layernorm.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.post_attention_layernorm.bias, Data type: torch.float32
Parameter: gpt_neox.layers.0.attention.query_key_value.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.attention.query_key_value.bias, Data type: torch.float32
Parameter: gpt_neox.layers.0.attention.dense.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.attention.dense.bias, Data type: torch.float32
Parameter: gpt_neox.layers.0.mlp.dense_h_to_4h.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.mlp.dense_h_to_4h.bias, Data type: torch.float32
Parameter: gpt_neox.layers.0.mlp.dense_4h_to_h.weight, Data type: torch.float32
Parameter: gpt_neox.layers.0.mlp.dense_4h_to_h

In [20]:
import lm_eval
from lm_eval.models.huggingface import HFLM

lm = HFLM(pretrained=model)

results_ = lm_eval.simple_evaluate( # call simple_evaluate
    model=lm,
    tasks=["hellaswag"],
    batch_size=256,
)
results_



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
2024-05-30:15:38:22,672 INFO     [evaluator.py:131] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
2024-05-30:15:38:36,121 INFO     [task.py:395] Building contexts for hellaswag on rank 0...
100%|██████████| 10042/10042 [00:03<00:00, 2985.04it/s]
2024-05-30:15:38:40,566 INFO     [evaluator.py:362] Running loglikelihood requests
Running loglikelihood requests: 100%|██████████| 40168/40168 [13:22<00:00, 50.03it/s]


{'results': {'hellaswag': {'acc,none': 0.2665803624775941,
   'acc_stderr,none': 0.004412674170976502,
   'acc_norm,none': 0.27384983071101376,
   'acc_norm_stderr,none': 0.004450214826707207,
   'alias': 'hellaswag'}},
 'group_subtasks': {'hellaswag': []},
 'configs': {'hellaswag': {'task': 'hellaswag',
   'group': ['multiple_choice'],
   'dataset_path': 'hellaswag',
   'training_split': 'train',
   'validation_split': 'validation',
   'process_docs': 'def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc["ctx_a"] + " " + doc["ctx_b"].capitalize()\n        out_doc = {\n            "query": preprocess(doc["activity_label"] + ": " + ctx),\n            "choices": [preprocess(ending) for ending in doc["endings"]],\n            "gold": int(doc["label"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n',
   'doc_to_text': '{{query}}',
   'doc_to_target': '{{label}}',
   'doc_to_choice': 'choices',
   'des

In [18]:
import lm_eval
from lm_eval.models.huggingface import HFLM

lm = HFLM(pretrained=model_nf4)

results = lm_eval.simple_evaluate( # call simple_evaluate
    model=lm,
    tasks=["hellaswag"],
    batch_size=256,
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
2024-05-30:14:32:32,471 INFO     [evaluator.py:131] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
2024-05-30:14:32:44,820 INFO     [task.py:395] Building contexts for hellaswag on rank 0...
100%|██████████| 10042/10042 [00:03<00:00, 2982.40it/s]
2024-05-30:14:32:49,274 INFO     [evaluator.py:362] Running loglikelihood requests
Running loglikelihood requests: 100%|██████████| 40168/40168 [08:20<00:00, 80.23it/s]


In [19]:
results

{'results': {'hellaswag': {'acc,none': 0.27145986855208126,
   'acc_stderr,none': 0.004438038583345072,
   'acc_norm,none': 0.2744473212507469,
   'acc_norm_stderr,none': 0.004453233726110388,
   'alias': 'hellaswag'}},
 'group_subtasks': {'hellaswag': []},
 'configs': {'hellaswag': {'task': 'hellaswag',
   'group': ['multiple_choice'],
   'dataset_path': 'hellaswag',
   'training_split': 'train',
   'validation_split': 'validation',
   'process_docs': 'def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc["ctx_a"] + " " + doc["ctx_b"].capitalize()\n        out_doc = {\n            "query": preprocess(doc["activity_label"] + ": " + ctx),\n            "choices": [preprocess(ending) for ending in doc["endings"]],\n            "gold": int(doc["label"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n',
   'doc_to_text': '{{query}}',
   'doc_to_target': '{{label}}',
   'doc_to_choice': 'choices',
   'des