**Fine tuning Mistral-7B (instruction) with Oscar Wilde texts**

In [None]:
# uncomment the following line to run in colab
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# uncomment the following line to run in colab
# !pip install -q -U bitsandbytes
# !pip install -q -U git+https://github.com/huggingface/transformers.git
# !pip install -q -U git+https://github.com/huggingface/peft.git
# !pip install -q -U git+https://github.com/huggingface/accelerate.git
# !pip install -q trl xformers wandb datasets einops gradio sentencepiece

In [None]:
# uncomment the following line to run in colab
# !nvidia-smi

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, platform, gradio, warnings
from datasets import load_dataset, Dataset
from trl import SFTTrainer
from huggingface_hub import notebook_login
import json

In [None]:
# model to fine-tune
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
dataset_name = "wilde_fine-tune_instructions_512_chat_format"

In [None]:
# path to the data
dir_data = f'./data/{dataset_name}.json' # comment this line if you want to run in colab
# dir_data = f'./drive/MyDrive/DL-ENS/dataset/{dataset_name}.json' # uncomment this line if you want to run in colab

In [None]:
# load the dataset
data_dict = json.load(open(dir_data))
dataset = Dataset.from_dict(data_dict)
dataset = dataset.train_test_split(test_size = 0.1, seed = 42)
dataset

In [None]:
# example of the dataset
print(dataset['train']['text'][0])

In [None]:
# Load base model(Mistral 7B-Instruct)
# quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
   model_name,
    quantization_config=bnb_config,
    device_map={"": 0} # use the first GPU
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
#Adding the adapters in the layers for fine tuning
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
    )
model = get_peft_model(model, peft_config)

In [None]:
# print the model
model

In [None]:
#Hyperparamters for fine tuning
training_arguments = TrainingArguments(
    output_dir= "./results",
    num_train_epochs= 2,
    per_device_train_batch_size= 4,
    gradient_accumulation_steps= 2,
    optim = "paged_adamw_8bit",
    save_steps= 1000,
    logging_steps= 30,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= "constant",
    # report_to="wandb"
)
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

In [None]:
# Train the model
trainer.train()

In [None]:
# evaluate the model
trainer.evaluate()

In [None]:
# save fine tuned model
model_save_name = 'Mistral7B_fine_tuned_OscarWilde.pt'
path = f"./models/{model_save_name}" # comment this line if you want to run in colab
# path = f"./drive/My Drive/{model_save_name}" # uncomment this line if you want to run in colab
torch.save(model.state_dict(), path)

In [1]:
print("Model saved successfully!")

Model saved successfully!
