In [1]:
! pip install accelerate peft bitsandbytes transformers trl auto-gptq optimum



In [1]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer
import os

def finetune_xwin_lm():
    data = load_dataset("yahma/alpaca-cleaned", split="train")
    data_df = data.to_pandas()
    data_df = data_df[:10000]
    data_df["text"] = data_df[["input", "instruction", "output"]].apply(lambda x: "###Human: " + x["instruction"] + "\n" + x["input"] + "\n###Assistant: " +x["output"], axis=1)
    print(data_df.iloc[0])
    data = Dataset.from_pandas(data_df)
    tokenizer = AutoTokenizer.from_pretrained("TheBloke/Xwin-LM-7B-V0.1-GPTQ")
    tokenizer.pad_token = tokenizer.eos_token
    quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True)
    model = AutoModelForCausalLM.from_pretrained(
                              "TheBloke/Xwin-LM-7B-V0.1-GPTQ",
                              quantization_config=quantization_config_loading,
                              device_map="auto"
                          )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    peft_config = LoraConfig(
        r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )
    model = get_peft_model(model, peft_config)
    training_arguments = TrainingArguments(
        output_dir="xwin-finetuned-alpaca-cleaned",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=1,
        max_steps=250,
        push_to_hub=True
    )
    trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )
    trainer.train()
    trainer.push_to_hub()

if __name__ == "__main__":
    finetune_xwin_lm()

input                                                           
output         1. Eat a balanced and nutritious diet: Make su...
instruction                 Give three tips for staying healthy.
text           ###Human: Give three tips for staying healthy....
Name: 0, dtype: object


You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.5176
20,1.1606
30,1.1348
40,0.9998
50,1.0455
60,1.0835
70,1.008
80,1.0359
90,1.0098
100,1.0913


In [3]:
! cp -r /content/xwin-finetuned-alpaca-cleaned /content/drive/MyDrive/

# Inference

In [1]:
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from transformers import AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("/content/xwin-finetuned-alpaca-cleaned")

inputs = tokenizer("""
###Instruction: I dropped my mobile phone in water. What to do?
###Response: """, return_tensors="pt").to("cuda")

model = AutoPeftModelForCausalLM.from_pretrained(
    "/content/xwin-finetuned-alpaca-cleaned",
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="cuda")

generation_config = GenerationConfig(
    penalty_alpha=0.6,
    do_sample = True,
    top_k=5,
    temperature=0.5,
    repetition_penalty=1.2,
    max_new_tokens=100
)
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


###Instruction: I dropped my mobile phone in water. What to do?
###Response: 1) Remove the battery and sim card immediately, as these components are sensitive to moisture damage. If there is any residual liquid left inside, dry it with a paper tissue or cloth. 2) Turn off your device completely by pressing the power button for at least 30 seconds (to ensure that all circuits have been shut down). 3) Dry out the affected areas of the device using a hairdryer on low heat setting, gently blowing
