In [None]:
!pip install -q -U bitsandbytes wandb
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m76.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.9/243.9 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m6.1 MB/

In [None]:
# Import libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset
import transformers
from huggingface_hub import notebook_login
import wandb

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Load in Llama-2-7b model using BitsAndBytes

model_id = "meta-llama/Llama-2-7b-hf"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=bnb_config,
            device_map={"":0})

tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load in the data

full_data = load_dataset("jackhogan/agemo_json_prompts")

Downloading readme:   0%|          | 0.00/623 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/140k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/5319 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/591 [00:00<?, ? examples/s]

In [None]:
full_data['test'][2]['scrambled']

'{"albumId": 92, "id",: 4580, "title": "sunt odio consequuntur perferendis", "url": "https://via.placeholder.com/600/b9d561", "thumbnailUrl": "https://via.placeholder.com/150/b9d561"}}'

In [None]:
# Try some inference with the base Llama-2-7b model
test_case = full_data['test'][5]['scrambled']
prompt = """
Below is a JSON string containing a syntactic error. Return the corrected JSON string.\n\n### Broken JSON:\n{}\n\n### Repaired JSON:\n
""".format(test_case)

inputs = tokenizer(prompt, return_tensors='pt').to('cuda')

output_tokens = model.generate(**inputs, max_new_tokens=200)

for i, output in enumerate(output_tokens):
  print(f"{tokenizer.decode(output)}")

<s> 
Below is a JSON string containing a syntactic error. Return the corrected JSON string.

### Broken JSON:
}{"albumId": 95, "id": 4719, "title": "ipsum atque amet voluptas mollitia rerum inventore", "url": "https://via.placeholder.com/600/6bb2f7", "thumbnailUrl": "https://via.placeholder.com/150/6bb2f7",[}

### Repaired JSON:

```json
{
  "albumId": 95,
  "id": 4719,
  "title": "ipsum atque amet voluptas mollitia rerum inventore",
  "url": "https://via.placeholder.com/600/6bb2f7",
  "thumbnailUrl": "https://via.placeholder.com/150/6bb2f7",
  "artist": {
    "id": 4719,
    "name": "ipsum atque amet voluptas mollitia rerum inventore"
  }
}
```

### Explanation:

The error was that the `artist` object was not closed properly. The `artist` object should have been closed with a `]` character.

### Notes:

* This is


In [None]:
# Modify tokenizer for training
tokenizer.add_eos_token = True
tokenizer.pad_token_id = 18610

In [None]:
# Use the prompted version for fine-tuning
data = full_data.map(lambda samples: tokenizer(samples["text"]), batched=True)

In [None]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'correct', 'scrambled', 'input_ids', 'attention_mask'],
        num_rows: 5319
    })
    test: Dataset({
        features: ['text', 'correct', 'scrambled', 'input_ids', 'attention_mask'],
        num_rows: 591
    })
})

In [None]:
# Prepare model for training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
config = LoraConfig(
    r=8,
    lora_alpha=32,
    #target_modules=["q_proj", "o_proj", "k_proj", "v_proj"],
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4194304 || all params: 3504607232 || trainable%: 0.11967971650867153


In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
#monitering login
run = wandb.init(project='Fine-tuning llama-2-7B', job_type="training", anonymous="allow")

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/learning_rate,▁▂▄▇████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆
train/loss,██▇█▇▆▆▆▆▅▄▅▄▄▄▄▄▃▃▄▃▃▃▃▄▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁

0,1
train/epoch,0.32
train/global_step,54.0
train/learning_rate,0.00015
train/loss,0.0


In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        warmup_steps=6,
        #max_steps=100,
        num_train_epochs=1.0,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        report_to="wandb"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,1.4029
2,1.3174
3,1.3559
4,1.4119
5,1.2293
6,1.2548
7,1.2589
8,1.0767
9,1.2487
10,1.1365


TrainOutput(global_step=332, training_loss=0.4353610528520791, metrics={'train_runtime': 6726.6486, 'train_samples_per_second': 0.791, 'train_steps_per_second': 0.049, 'total_flos': 4.947728534740992e+16, 'train_loss': 0.4353610528520791, 'epoch': 1.0})

In [None]:
model.push_to_hub("jackhogan/llama_agemo_finetune",
                  commit_message="1 epoch of finetune",
                  private=True)

adapter_model.safetensors:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jackhogan/llama_agemo_finetune/commit/f44a5a40bf383dae3668dcfd2015356df0d16065', commit_message='1 epoch of finetune', commit_description='', oid='f44a5a40bf383dae3668dcfd2015356df0d16065', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
test_case = data['test'][24]['scrambled']

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
test_case

'{"albumId": 79, "id": 3911, "title": ,{"albumId": 29, "id": 1404, "title": "veniam sint ullam eligendi animi", "url": "https://via.placeholder.com/600/ffe146", "thumbnailUrl": "https://via.placeholder.com/150/ffe146"[}, "url": "https://via.placeholder.com/600/1984e8", "thumbnailUrl": "https://via.placeholder.com/150/1984e8"}'

In [None]:
# Try some inference on the fine-tuned version
# Try some inference with the base Llama-2-7b model
test_case = data['test'][24]['scrambled']
prompt = """
Below is a JSON string containing a syntactic error. Return the corrected JSON string.\n\n### Broken JSON:\n{}\n\n### Repaired JSON:\n
""".format(test_case)

inputs = tokenizer(prompt, return_tensors='pt').to('cuda')

output_tokens = model.generate(**inputs, max_new_tokens=500)

for i, output in enumerate(output_tokens):
  print(f"{tokenizer.decode(output)}")

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


KeyboardInterrupt: ignored