<a href="https://colab.research.google.com/github/jayeshvpatil/llm_colabs/blob/main/Fine_Tuning_with_Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install accelerate peft bitsandbytes transformers>=4.31.0 trl


In [2]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments,    BitsAndBytesConfig
from trl import SFTTrainer
import os

In [3]:
model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
output_model="tinyllama-medqa-jp-v1"

In [4]:
from datasets import load_dataset

dataset_id ="keivalya/MedQuad-MedicalQnADataset"

In [5]:
# we need to reformat the data in teh ChatML format.

def formatted_train(input,response)->str:
    return f"<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>\n"

In [6]:
def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train").shuffle(seed=42).select(range(1000))
    data_df = data.to_pandas()
    data_df["text"] = data_df[["Question", "Answer"]].apply(lambda x: "<|im_start|>user\n" + x["Question"] + " <|im_end|>\n<|im_start|>assistant\n" + x["Answer"] + "<|im_end|>\n", axis=1)
    data = Dataset.from_pandas(data_df)
    return data

In [7]:
data = prepare_train_data(dataset_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/233 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/22.5M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
data


Dataset({
    features: ['qtype', 'Question', 'Answer', 'text'],
    num_rows: 1000
})

In [9]:
data[0]

{'qtype': 'inheritance',
 'Question': 'Is D-bifunctional protein deficiency inherited ?',
 'Answer': 'This condition is inherited in an autosomal recessive pattern, which means both copies of the gene in each cell have mutations. The parents of an individual with an autosomal recessive condition each carry one copy of the mutated gene, but they typically do not show signs and symptoms of the condition.',
 'text': '<|im_start|>user\nIs D-bifunctional protein deficiency inherited ? <|im_end|>\n<|im_start|>assistant\nThis condition is inherited in an autosomal recessive pattern, which means both copies of the gene in each cell have mutations. The parents of an individual with an autosomal recessive condition each carry one copy of the mutated gene, but they typically do not show signs and symptoms of the condition.<|im_end|>\n'}

In [10]:
def get_model_and_tokenizer(model_id):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_id, quantization_config=bnb_config, device_map="auto"
    )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    return model, tokenizer

In [11]:
model, tokenizer = get_model_and_tokenizer(model_id)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [12]:
peft_config = LoraConfig(
        r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )

In [18]:
training_arguments = TrainingArguments(
        output_dir=output_model,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=3,
        max_steps=250,
        fp16=True,
        # push_to_hub=True
    )

In [19]:
trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [20]:
trainer.train()

Step,Training Loss
10,1.7704
20,1.587
30,1.4373
40,1.3442
50,1.3538
60,1.2946
70,1.2865
80,1.3152
90,1.2032
100,1.3139


TrainOutput(global_step=250, training_loss=1.2622682189941405, metrics={'train_runtime': 268.9481, 'train_samples_per_second': 3.718, 'train_steps_per_second': 0.93, 'total_flos': 1866330866479104.0, 'train_loss': 1.2622682189941405, 'epoch': 1.0})

In [21]:
trainer.save_model("test-tinyllama-med-trained")

In [22]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [23]:
trainer.push_to_hub('jayeshvpatil/tinyllama-medqa')

adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

events.out.tfevents.1706902842.25cf30ae6f51.237.0:   0%|          | 0.00/4.64k [00:00<?, ?B/s]

events.out.tfevents.1706903143.25cf30ae6f51.237.1:   0%|          | 0.00/4.64k [00:00<?, ?B/s]

events.out.tfevents.1706903169.25cf30ae6f51.237.2:   0%|          | 0.00/8.88k [00:00<?, ?B/s]

Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.60k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jayeshvpatil/tinyllama-medqa-jp-v1/commit/870748b55f0b1bfab8baff3adbec0e5c8f3e0558', commit_message='jayeshvpatil/tinyllama-medqa', commit_description='', oid='870748b55f0b1bfab8baff3adbec0e5c8f3e0558', pr_url=None, pr_revision=None, pr_num=None)

In [24]:
def formatted_prompt(question)-> str:
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

In [28]:
from transformers import GenerationConfig
user_input='Is D-bifunctional protein deficiency inherited?'
prompt = formatted_prompt(user_input)
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.5,repetition_penalty=1.2,
    max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
)
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


<|im_start|>user
Is D-bifunctional protein deficiency inherited?<|im_end|>
<|im_start|>assistant:This condition is inherited in an autosomal recess


In [35]:
data[2]

{'qtype': 'information',
 'Question': 'What is (are) Compulsive Gambling ?',
 'Answer': "Many people enjoy gambling, whether it's betting on a horse or playing poker on the Internet. Most people who gamble don't have a problem, but some lose control of their gambling. Signs of problem gambling include        -  Always thinking about gambling    -  Lying about gambling    -  Spending work or family time gambling     -  Feeling bad after you gamble, but not quitting     -  Gambling with money you need for other things       If you have concerns about your gambling, ask for help. Your health care provider can work with you to find the treatment that's best for you.    NIH: National Institutes of Health",
 'text': "<|im_start|>user\nWhat is (are) Compulsive Gambling ? <|im_end|>\n<|im_start|>assistant\nMany people enjoy gambling, whether it's betting on a horse or playing poker on the Internet. Most people who gamble don't have a problem, but some lose control of their gambling. Signs of p

In [36]:
from transformers import GenerationConfig
user_input='What is compulsive gambling?'
prompt = formatted_prompt(user_input)
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.5,repetition_penalty=1.2,
    max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
)
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


<|im_start|>user
What is compulsive gambling?<|im_end|>
<|im_start|>assistant:Compulsive Gambling (Gambling Disorder
