In [None]:
! pip install accelerate peft bitsandbytes transformers trl

Collecting accelerate
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m115.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.5.0-py3-none-any.whl (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.1/88.1 kB[0m [31m13.0 MB/s[0m eta [36m0:00:0

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

def platypus_training():
    data = load_dataset("garage-bAInd/Open-Platypus", split="train")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["instruction", "output"]].apply(lambda x: x["instruction"] + " " + x["output"], axis=1)
    data_df.drop(["instruction", "output"], axis=1, inplace=True)
    data = Dataset.from_pandas(data_df)
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-hf", quantization_config=bnb_config, device_map={"": 0}
    )

    model.config.use_cache=False
    model.config.pretraining_tp=1
    peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        target_modules = ["gate_proj" , "down_proj", "up_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    training_arguments = TrainingArguments(
        output_dir="platypus_llama_7b",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="steps",
        save_steps = 50,
        save_total_limit = 100,
        logging_steps=10,
        num_train_epochs=1,
        max_steps=110,
        fp16=True,
        push_to_hub=True
    )

    trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )
    trainer.train()
    trainer.push_to_hub()
    trainer.save_model("platypus_llama_7b")

    output_dir = os.path.join("platypus_llama_7b", "final_checkpoint")
    trainer.model.save_pretrained(output_dir)

if __name__ == "__main__":
    platypus_training()

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



Map:   0%|          | 0/24926 [00:00<?, ? examples/s]

Cloning https://huggingface.co/Vasanth/platypus_llama_7b into local empty directory.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.2254
20,1.1669
30,1.0886
40,1.0677
50,1.0375
60,1.0627
70,0.966
80,1.0848
90,1.0187


Step,Training Loss
10,1.2254
20,1.1669
30,1.0886
40,1.0677
50,1.0375
60,1.0627
70,0.966
80,1.0848
90,1.0187
100,1.0055


Upload file adapter_model.bin:   0%|          | 1.00/88.6M [00:00<?, ?B/s]

Upload file runs/Aug17_23-44-43_afd7057d74a0/events.out.tfevents.1692315953.afd7057d74a0.1023.0:   0%|        …

To https://huggingface.co/Vasanth/platypus_llama_7b
   e5c6c6b..c6f0a41  main -> main

   e5c6c6b..c6f0a41  main -> main

To https://huggingface.co/Vasanth/platypus_llama_7b
   c6f0a41..65bdb25  main -> main

   c6f0a41..65bdb25  main -> main

To https://huggingface.co/Vasanth/platypus_llama_7b
   65bdb25..71f083d  main -> main

   65bdb25..71f083d  main -> main

To https://huggingface.co/Vasanth/platypus_llama_7b
   71f083d..716d0f0  main -> main

   71f083d..716d0f0  main -> main



In [None]:
! cp -r /content/platypus_llama_7b /content/drive/MyDrive/

In [None]:
torch.cuda.empty_cache()

In [None]:
# from peft import AutoPeftModelForCausalLM
# from transformers import BitsAndBytesConfig
# import os
# import torch

# model = AutoPeftModelForCausalLM.from_pretrained("/content/drive/MyDrive/platypus_llama_7b/final_checkpoint", device_map={"": 0})
# model = model.merge_and_unload()
# output_merged_dir = os.path.join("/content/drive/MyDrive/platypus_llama_7b", "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)