In [1]:
import logging

from transformers import GPT2Tokenizer

from src.model import GPT2, LoRAConfig

logging.basicConfig(level=logging.INFO)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = GPT2.from_pretrained("gpt2")
model.eval()

INFO:src.model:Initializing a pre-trained gpt2 model...
INFO:src.model:Initialized GPT with 124.44 M parameters (of which 38.60 M in embeddings)
INFO:src.model:Loading pre-trained weights from HuggingFace...


GPT2(
  (transformer): ModuleDict(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear(in_features=768, out_features=768, bias=True)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [4]:
prompt = "Once upon a time in a land far, far away, there lived a"
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output_ids = model.generate(input_ids, max_tokens=50, temperature=1.0, top_k=40)
generated_text = tokenizer.decode(output_ids[0])

print(generated_text)

Once upon a time in a land far, far away, there lived a king of men, who was the daughter of an eminent merchant, who came from a land far to the south, who had a great army, and who had sent a great army against their home, to bring ruin on them. For the king was


# Add new tokens to tokenizer

**GPT2 trained without padding tokens, need to add**

**im tokens usage**

```bash
<|im_start|>user
How do I bake a cake?<|im_end|>
<|im_start|>assistant
Here's how to bake a basic cake...<|im_end|>

In [5]:
tokenizer.add_special_tokens({
    "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
    "pad_token": "<|pad|>",
})

tokenizer.im_start_token = "<|im_start|>"
tokenizer.im_end_token = "<|im_end|>"

tokenizer.im_start_token_id = tokenizer.convert_tokens_to_ids("<|im_start|>")
tokenizer.im_end_token_id = tokenizer.convert_tokens_to_ids("<|im_end|>")

# Make fine-tuneable

In [6]:
fine_tuneable = model.to_fine_tuneable()

INFO:src.model:Initialized GPT with 124.44 M parameters (of which 38.60 M in embeddings)


In [7]:
fine_tuneable.extend_vocabulary(len(tokenizer))
fine_tuneable.set_padding_token(tokenizer.pad_token_id)

INFO:src.model:Extended token embeddings: 50257 -> 50260
INFO:src.model:Set padding embedding at index 50259 to zero


In [8]:
lora_config = LoRAConfig(r=8, alpha=32)
fine_tuneable.apply_lora(lora_config)

INFO:src.model:Initialized LoRA layers for modules: ['c_attn', 'c_proj', 'c_fc']
INFO:src.model:Registered selective gradient hook for 2 new tokens
INFO:src.model:Applied selective parameter freezing for LoRA and new token embeddings
INFO:src.model:LoRA initialized: num. of parameters requiring gradient computation: 124.44 M -> 39.78 M
