In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
# base model from huggingFace or path to model
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
new_model = "csci-566"


In [2]:
%%capture
!pip install -U bitsandbytes
!pip install transformers==4.36.2
!pip install -U peft
!pip install -U accelerate
!pip install -U trl
!pip install datasets==2.16.0
!pip install sentencepiece

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
from datasets import load_dataset
from trl import SFTTrainer
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset
import re


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


# Login to HF

In [5]:
from google.colab import userdata
secret_hf = userdata.get('HUGGINGFACE_TOKEN')
!huggingface-cli login --token $secret_hf

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Build Dataset

In [7]:
df = pd.read_csv("/content/drive/MyDrive/csci566_data/concept_aware_reasoning_preference_data.csv")

df_fine_tune = pd.DataFrame()
df_fine_tune['text'] = '<s>[INST]' + df['prompt'] + '[/INST]' + df['chosen'] + '</s>'

# Convert the new DataFrame to a Dataset object
dataset = Dataset(pa.Table.from_pandas(df_fine_tune))
print(dataset[0])

{'text': '<s>[INST]Question: - A small ice cube at a temperature of 0°C is dropped into a glass of water at 28°C and melts. What is the temperature of the water in the glass just after the ice cube melts?\nOptions: - [\'0°C\', \'between 0°C and 28°C\', \'28°C\', \'greater than 28°C\']\n\nAll the main real world concepts essential to answer the question are: \nList them down along with one line description\nRemember to return the response in a json structure as defined below - \n\n"concepts": [\n  {\n    "name": "concept/entity name",\n    "description": "one line description"\n  },\n  {\n    "name": "conecpt/entity name",\n    "description": "one line description"\n  },\n  ...\n]\n\n[your concepts list]\n\nIn manner of ontology, describe the relationship for each concept to the other concepts in the context of the sentence in a structured manner along with one line description for each are:\nRemember to return the response in a json structure as defined below -\n\n"relationships": [\n\

In [8]:
print(dataset[1])

{'text': '<s>[INST]Question: - Base your answers on the information below. One hot, summer day it rained very heavily. After the rain, a plastic pan on a picnic table had 2 cm of rainwater in it. Four hours later, all the rainwater in the pan was gone. If the day were cool instead of hot, the rainwater in the pan would have disappeared\nOptions: - [\'slower\', \'faster\', \'in the same amount of time\']\n\nAll the main real world concepts essential to answer the question are: \nList them down along with one line description\nRemember to return the response in a json structure as defined below - \n\n"concepts": [\n  {\n    "name": "concept/entity name",\n    "description": "one line description"\n  },\n  {\n    "name": "conecpt/entity name",\n    "description": "one line description"\n  },\n  ...\n]\n\n[your concepts list]\n\nIn manner of ontology, describe the relationship for each concept to the other concepts in the context of the sentence in a structured manner along with one line d

# Load the model

In [10]:
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)


model.config.use_cache = False # silence the warnings.
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

('<s>', '</s>')

# Fine-Tune

In [11]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [13]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="/content/drive/MyDrive/csci566_data",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=1,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)


In [14]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)



Map:   0%|          | 0/1747 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [15]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,1.2532
2,1.0877
3,0.8986
4,0.797
5,0.6789
6,0.5717
7,0.5146
8,0.4022
9,0.3652
10,0.3419




TrainOutput(global_step=437, training_loss=0.2820743478885777, metrics={'train_runtime': 4765.4382, 'train_samples_per_second': 0.367, 'train_steps_per_second': 0.092, 'total_flos': 7.584660075225907e+16, 'train_loss': 0.2820743478885777, 'epoch': 1.0})

# Save and push the adapter to HF

In [17]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
model.config.use_cache = True
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Line

In [18]:
trainer.model.push_to_hub(new_model)

adapter_model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Redliz/csci-566/commit/77ed671853f7f2a2ce8916b0b9f7f72123bfe354', commit_message='Upload model', commit_description='', oid='77ed671853f7f2a2ce8916b0b9f7f72123bfe354', pr_url=None, pr_revision=None, pr_num=None)

# Test the model

In [None]:
logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

In [None]:
def build_prompt(question):
  prompt=f"<s>[INST]{question} [/INST]"
  return prompt

In [None]:
question = "......"
prompt = build_prompt(question)
result = pipe(prompt)

print(result[0]['generated_text'])

--------------------------