In [None]:
! pip install accelerate peft bitsandbytes transformers trl

Collecting accelerate
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m115.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.5.0-py3-none-any.whl (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.1/88.1 kB[0m [31m13.0 MB/s[0m eta [36m0:00:0

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

def platypus_training():
    data = load_dataset("garage-bAInd/Open-Platypus", split="train")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["instruction", "output"]].apply(lambda x: x["instruction"] + " " + x["output"], axis=1)
    data_df.drop(["instruction", "output"], axis=1, inplace=True)
    data = Dataset.from_pandas(data_df)
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-hf", quantization_config=bnb_config, device_map={"": 0}
    )

    model.config.use_cache=False
    model.config.pretraining_tp=1
    peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        target_modules = ["gate_proj" , "down_proj", "up_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    training_arguments = TrainingArguments(
        output_dir="platypus_llama_7b",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="steps",
        save_steps = 50,
        save_total_limit = 100,
        logging_steps=10,
        num_train_epochs=1,
        max_steps=110,
        fp16=True,
        push_to_hub=True
    )

    trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )
    trainer.train()
    trainer.push_to_hub()
    trainer.save_model("platypus_llama_7b")

    output_dir = os.path.join("platypus_llama_7b", "final_checkpoint")
    trainer.model.save_pretrained(output_dir)

if __name__ == "__main__":
    platypus_training()

In [None]:
! cp -r /content/platypus_llama_7b /content/drive/MyDrive/

In [None]:
torch.cuda.empty_cache()

In [None]:
# from peft import AutoPeftModelForCausalLM
# from transformers import BitsAndBytesConfig
# import os
# import torch

# model = AutoPeftModelForCausalLM.from_pretrained("/content/drive/MyDrive/platypus_llama_7b/final_checkpoint", device_map={"": 0})
# model = model.merge_and_unload()
# output_merged_dir = os.path.join("/content/drive/MyDrive/platypus_llama_7b", "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)