In [1]:
from transformers import AutoTokenizer
import transformers
import torch

In [2]:
# define the model (llama2)

model = "meta-llama/Llama-2-7b-hf" # replace with llama-2-7b-chat-hf for chatbot style model
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
    # device=0 # for using the GPU
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
sequences = pipeline(
    'Jupiter is a ',
    do_sample=True,
    top_k=10,
    num_return_sequences=10,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=10,
    temperature=1.0,
)
for seq in sequences:
    print(f"{seq['generated_text']}")


Jupiter is a 1947 Indian Malayalam film
Jupiter is a 100% solar powered system. It
Jupiter is a 2-hour drive to the east of us,
Jupiter is a 360-degree camera with a 
Jupiter is a 6000 year cycle and that the 
Jupiter is a 2.0 liter turbocharged four-
Jupiter is a 2017 Telugu-language action
Jupiter is a 2022 American action thriller film directed
Jupiter is a 2018 American biographical drama film based
Jupiter is a 3rd house stellium.
This year


In [4]:
# sequences = pipeline(
#     'Tell me a story about a person writing a tutorial for installing llama 2 where every letter starts with s\n',
#     do_sample=True,
#     top_k=10,
#     num_return_sequences=1,
#     eos_token_id=tokenizer.eos_token_id,
#     max_length=200,
# )
# for seq in sequences:
#     print(f"Result: {seq['generated_text']}")

In [6]:
pipeline.model.model.layers[0]

LlamaDecoderLayer(
  (self_attn): LlamaSdpaAttention(
    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (rotary_emb): LlamaRotaryEmbedding()
  )
  (mlp): LlamaMLP(
    (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
    (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
    (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
    (act_fn): SiLU()
  )
  (input_layernorm): LlamaRMSNorm()
  (post_attention_layernorm): LlamaRMSNorm()
)

In [9]:
pipeline.model.model.layers[0]

LlamaDecoderLayer(
  (self_attn): LlamaSdpaAttention(
    (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
    (rotary_emb): LlamaRotaryEmbedding()
  )
  (mlp): LlamaMLP(
    (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
    (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
    (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
    (act_fn): SiLU()
  )
  (input_layernorm): LlamaRMSNorm()
  (post_attention_layernorm): LlamaRMSNorm()
)

In [7]:
# GPT2 example

from transformers import AutoTokenizer, pipeline
import torch

# define the GPT-2 model name (could be "gpt2", "gpt2-medium", etc.)
model_name = "gpt2"

# load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
text_gen_pipe = pipeline(
    "text-generation",
    model=model_name,
    # you can specify `device=0` if you have a GPU
    # device=0,
)

# example prompt
sequences = text_gen_pipe(
    "Jupiter is a ",
    do_sample=True,
    top_k=10,
    num_return_sequences=10,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=10,
    temperature=1.0,
)

# print results
for seq in sequences:
    print(seq["generated_text"])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Jupiter is a vernal constellation. It has five planets, three
Jupiter is a urchin-sized planet with four moons orbiting each
Jupiter is a iced tea, brewed with vanilla and rosemary and
Jupiter is a iced tea that can be brewed in a number of
Jupiter is a urn located within a planet that lies at approximately 6
Jupiter is a icescape.

This image is taken with
Jupiter is a ichthyosperm-like protoplanetary object
Jupiter is a vernacular name that means to fly.


Jupiter is a xturn of Jupiter's orbit around the sun.
Jupiter is a iced tea, a popular beverage that is often used


In [21]:
# inspecting GPT2 model architecture
# Karpathy minGPT model of GPT2: https://github.com/karpathy/minGPT/blob/master/mingpt/model.py 

In [8]:
text_gen_pipe.model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [19]:
text_gen_pipe.model.transformer

GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)