In [None]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, load_from_disk

In [7]:
device = 'cuda' if torch.cuda.is_available() else "cpu"

In [8]:
device

'cuda'

In [9]:
model = AutoModelForCausalLM.from_pretrained("gpt2", device_map='auto',)

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [10]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [11]:
# Freeze weights
for param in model.parameters():
    param.requires_grad = False

In [12]:
# LoRA
config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [13]:
model = get_peft_model(model, config)



In [14]:
# Load and structure data
data = load_from_disk("/kaggle/working/squad_v2")

In [15]:
data["train"]

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 130319
})

In [16]:
def create_prompt(context, question, answer):

    if len(answer["text"]) < 1:
        answer = "(ಠ_ಠ) ?"
    else:
        answer = answer["text"][0]

    prompt = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>"
    return prompt

In [17]:
mapped_data = data.map(lambda samples: tokenizer(create_prompt(samples['context'], samples['question'], samples['answers'])))

Map:   0%|          | 0/130319 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (25744 > 1024). Running this sequence through the model will result in indexing errors


Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

In [18]:
data

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

In [19]:
mapped_data

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers', 'input_ids', 'attention_mask'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers', 'input_ids', 'attention_mask'],
        num_rows: 11873
    })
})

In [20]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [21]:
print_trainable_parameters(model)

trainable params: 589824 || all params: 125029632 || trainable%: 0.4717473694555863


In [22]:
# TRAINING
trainer = transformers.Trainer(
    model=model,
    train_dataset=mapped_data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=500,
        learning_rate=2e-4,
        logging_steps=1,
        output_dir="outputs",
        auto_find_batch_size=True
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False

2024-05-13 16:31:08.305260: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-13 16:31:08.305355: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-13 16:31:08.415172: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [23]:
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ·········································


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 41
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
1,3.6087
2,3.7906
3,3.8669
4,3.7126
5,3.7916
6,3.878
7,3.9183
8,3.7548
9,3.7057
10,3.6818


RuntimeError: 
            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'base_model.model.lm_head.weight', 'base_model.model.transformer.wte.weight'}].
            A potential way to correctly save your model is to use `save_model`.
            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
            

In [24]:
torch.save(model.state_dict(), "lora-question-r16.pt")

In [25]:
model_trained = AutoModelForCausalLM.from_pretrained("gpt2", device_map="auto")

In [26]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [63]:
model_trained = get_peft_model(model_trained, config)
model_trained = model_trained.to(device)
model_trained.load_state_dict(torch.load("lora-question-r16.pt", map_location=device))

<All keys matched successfully>

In [27]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

In [46]:
context = "I like learning math and playing computer games."
question = "What do I like to play?"
model_trained.to('cpu')
with torch.no_grad():
    batch = tokenizer(f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n", return_tensors='pt')
#     batch = tokenizer("“If you pay in peanuts, you get monkeys” ->: ", return_tensors="pt").to(device)
    output_tokens = model_trained.generate(**batch, max_new_tokens=25)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [47]:
print("\n\n", tokenizer.decode(output_tokens[0], skip_special_tokens=True))



 ### CONTEXT
I like learning math and playing computer games.

### QUESTION
What do I like to play?

### ANSWER

I like to play games.

### QUESTION

What do I like to play?

### AN
