In [1]:
# from transformers import GPT2LMHeadModel
# lm_model = GPT2LMHeadModel.from_pretrained(f"openai-community/gpt2", 
#                                                cache_dir='/data1/bumjin/datahub', 
# )
# lm_model
import os 
import torch 
from transformers import LlamaForCausalLM, LlamaTokenizer

def get_llama2(lm_name, lm_size, lm_cache_dir, num_gpus:int, precision):
    if 'llama' not in lm_cache_dir:
            lm_cache_dir = os.path.join(lm_cache_dir, 'llama')
    if num_gpus == 0:
        my_device_map='cpu'
    else:
        my_device_map = 'cuda:0'
    
    if 'chat' in lm_name:
        model_name = f"meta-llama/Llama-2-{lm_size}-chat-hf"
    else:
        model_name = f"meta-llama/Llama-2-{lm_size}-hf"
        
    if precision=='int8':
        precision_args = dict(load_in_8bit=True, device_map='cuda:0', torch_dtype=torch.bfloat16)
    elif precision=='half':
        precision_args = dict(torch_dtype=torch.float16)
    else:
        precision_args = {}
        
    model = LlamaForCausalLM.from_pretrained(
        model_name,
        cache_dir=lm_cache_dir,
        resume_download=True,
        device_map=my_device_map,
        **precision_args,
    )
    tokenizer = LlamaTokenizer.from_pretrained(
        model_name, 
        cache_dir=lm_cache_dir
    )
    tokenizer.sep_token_id =  tokenizer.eos_token_id 
    tokenizer.pad_token_id =  tokenizer.eos_token_id 
    return model, tokenizer

lm_model ,tokenizer = get_llama2('llama2', '7b', lm_cache_dir='/data1/bumjin/datahub', num_gpus=1, precision=None)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
lm_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head): Lin

In [4]:
from transformers import AutoTokenizer

def fn(module, input, output):
    output[0].data *= 0 
    return output

# for layer in range(12):
#     hook = lm_model.transformer.h[layer].mlp.register_forward_hook(fn)
#     tokenizer = AutoTokenizer.from_pretrained(f"openai-community/gpt2")
#     input = tokenizer(['text is here'], return_tensors='pt')
#     output = lm_model.generate(**input, )
#     print("🍊 ------------------------------------------------------------")
#     print(f"layer:{layer}", tokenizer.decode(output[0], pad_token_id=tokenizer.eos_token_id), )
#     hook.remove()
    
print("start....")
for layer in range(32):
    print("🍊 ------------------------------------------------------------")
    hook = lm_model.model.layers[layer].mlp.register_forward_hook(fn)
    input = tokenizer(['Michel Jordan plays'], return_tensors='pt')
    input['input_ids']  =input['input_ids'].to("cuda:0")
    input['attention_mask'] = input['attention_mask'].to("cuda:0")
    output = lm_model.generate(**input,  max_new_tokens=30)
    print(f"layer:{layer}", tokenizer.decode(output[0], pad_token_id=tokenizer.eos_token_id), )
    hook.remove()

start....
🍊 ------------------------------------------------------------
layer:0 <s> Michel Jordan playsMSPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPAPA
🍊 ------------------------------------------------------------
layer:1 <s> Michel Jordan playsOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
🍊 ------------------------------------------------------------
layer:2 <s> Michel Jordan plays the piano and sings in the musical band called “The Barefoot”.
The Barefoot is a band that plays in the style of
🍊 ------------------------------------------------------------
layer:3 <s> Michel Jordan plays the role of the French journalist who has come to the United States to interview a former Nazi officer who has been living in the US under an assumed
🍊 ------------------------------------------------------------
layer:4 <s> Michel Jordan plays with the same team as his son
The NBA has been a family affair for the Jordans.
Michael Jordan, the NBA legend and six-
🍊 ----------------------------------------