<a href="https://colab.research.google.com/github/jayeshvpatil/llm_colabs/blob/main/Fine_Tuning_with_Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install accelerate peft bitsandbytes transformers>=4.31.0 trl


In [2]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments,    BitsAndBytesConfig
from trl import SFTTrainer
import os

In [3]:
model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
output_model="tinyllama-medqa-jp-v1"

In [4]:
from datasets import load_dataset

dataset_id ="keivalya/MedQuad-MedicalQnADataset"

In [5]:
# we need to reformat the data in teh ChatML format.

def formatted_train(input,response)->str:
    return f"<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>\n"

In [6]:
def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train").shuffle(seed=42).select(range(100000))
    data_df = data.to_pandas()
    data_df["text"] = data_df[["Question", "Answer"]].apply(lambda x: "<|im_start|>user\n" + x["Question"] + " <|im_end|>\n<|im_start|>assistant\n" + x["Answer"] + "<|im_end|>\n", axis=1)
    data = Dataset.from_pandas(data_df)
    return data

In [7]:
data = prepare_train_data(dataset_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/233 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/22.5M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
data


Dataset({
    features: ['qtype', 'Question', 'Answer', 'text'],
    num_rows: 10000
})

In [9]:
data[0]

{'qtype': 'inheritance',
 'Question': 'Is D-bifunctional protein deficiency inherited ?',
 'Answer': 'This condition is inherited in an autosomal recessive pattern, which means both copies of the gene in each cell have mutations. The parents of an individual with an autosomal recessive condition each carry one copy of the mutated gene, but they typically do not show signs and symptoms of the condition.',
 'text': '<|im_start|>user\nIs D-bifunctional protein deficiency inherited ? <|im_end|>\n<|im_start|>assistant\nThis condition is inherited in an autosomal recessive pattern, which means both copies of the gene in each cell have mutations. The parents of an individual with an autosomal recessive condition each carry one copy of the mutated gene, but they typically do not show signs and symptoms of the condition.<|im_end|>\n'}

In [10]:
def get_model_and_tokenizer(model_id):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_id, quantization_config=bnb_config, device_map="auto"
    )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    return model, tokenizer

In [11]:
model, tokenizer = get_model_and_tokenizer(model_id)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [12]:
peft_config = LoraConfig(
        r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )

In [31]:
training_arguments = TrainingArguments(
        output_dir=output_model,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=3,
        max_steps=250,
        fp16=True,
        push_to_hub=True
    )

In [32]:
trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=1024
    )

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [33]:
trainer.train()

Step,Training Loss
10,1.8462
20,1.6047
30,1.5215
40,1.3193
50,1.2769
60,1.2229
70,1.2206
80,1.232
90,1.1227
100,1.2267


TrainOutput(global_step=250, training_loss=1.2068996658325195, metrics={'train_runtime': 313.2931, 'train_samples_per_second': 3.192, 'train_steps_per_second': 0.798, 'total_flos': 2209968242675712.0, 'train_loss': 1.2068996658325195, 'epoch': 0.1})

In [34]:
trainer.save_model("test-tinyllama-medqa-trained")

In [26]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [35]:
trainer.push_to_hub('end of training')

CommitInfo(commit_url='https://huggingface.co/jayeshvpatil/tinyllama-medqa-jp-v1/commit/aa5ef44508b357d174a859ba89ce5e2a1cea15ae', commit_message='end of training', commit_description='', oid='aa5ef44508b357d174a859ba89ce5e2a1cea15ae', pr_url=None, pr_revision=None, pr_num=None)

In [39]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM
import torch
import os

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, load_in_8bit=False,
                                             device_map="auto",
                                             trust_remote_code=True)
peft_model = PeftModel.from_pretrained(model, output_model, from_transformers=True, device_map={"":0})
model = peft_model.merge_and_unload()

In [40]:
model.push_to_hub(output_model)

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jayeshvpatil/tinyllama-medqa-jp-v1/commit/d496714fcbc1d0b3e96b3c8d4de0c08b0b904ad8', commit_message='Upload LlamaForCausalLM', commit_description='', oid='d496714fcbc1d0b3e96b3c8d4de0c08b0b904ad8', pr_url=None, pr_revision=None, pr_num=None)

In [27]:
def formatted_prompt(question)-> str:
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

In [30]:
from transformers import GenerationConfig
user_input='Is D-bifunctional protein deficiency inherited?'
prompt = formatted_prompt(user_input)
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.1,repetition_penalty=1.2,
    max_new_tokens=256,pad_token_id=tokenizer.eos_token_id
)
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


<|im_start|>user
Is D-bifunctional protein deficiency inherited?<|im_end|>
<|im_start|>assistant:D-bifunctional protein (DBP) is a protein that plays several roles in the body. It helps to regulate blood sugar levels, which are important for maintaining energy balance and preventing diabetes. DBP also regulates the activity of enzymes involved in digestion and absorption of nutrients from food.<|im_end|>
These interactive molecular diagrams show how D-Bifunctional Protein works with other proteins in cells.


In [58]:
data[3]

{'qtype': 'susceptibility',
 'Question': 'Who is at risk for Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)? ?',
 'Answer': "Having certain syndromes can increase the risk of pancreatic NETs. Anything that increases your risk of getting a disease is called a risk factor. Having a risk factor does not mean that you will get cancer; not having risk factors doesn't mean that you will not get cancer. Talk with your doctor if you think you may be at risk.     Multiple endocrine neoplasia type 1 (MEN1) syndrome is a risk factor for pancreatic NETs.",
 'text': "<|im_start|>user\nWho is at risk for Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)? ? <|im_end|>\n<|im_start|>assistant\nHaving certain syndromes can increase the risk of pancreatic NETs. Anything that increases your risk of getting a disease is called a risk factor. Having a risk factor does not mean that you will get cancer; not having risk factors doesn't mean that you will not get cancer. Talk with your doctor if you t

In [59]:
from transformers import GenerationConfig
user_input='Who is at risk for Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)?'
prompt = formatted_prompt(user_input)
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.1,repetition_penalty=1.2,
    max_new_tokens=512,pad_token_id=tokenizer.eos_token_id
)
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


<|im_start|>user
Who is at risk for Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)?<|im_end|>
<|im_start|>assistant:The incidence of pancreatic neuroendocrine tumors has increased in recent years. The majority of cases are diagnosed after the patient's symptoms have been present for several months or longer.<|im_end|>
These tumors can occur anywhere in the body, but they most commonly affect the liver and pancreas. They may also be found in other organs such as the lungs, stomach, intestines, kidneys, thyroid gland, adrenal glands, and skin. In some people with these tumors, the cancer spreads to nearby tissues or blood vessels. These tumors usually grow slowly over many years. However, if a person develops signs and symptoms that suggest an aggressive form of this disease, it is important to seek medical attention immediately. If you think you might have one of these diseases, talk to your doctor about getting regular checkups.


In [7]:
import requests
from time import perf_counter

API_URL = "https://api-inference.huggingface.co/models/jayeshvpatil/tinyllama-medqa-jp-v1"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

start_time = perf_counter()
output = query({
	"inputs": "What is a Tumor",
})
output_time = perf_counter() - start_time
print(f"Result: {output}")
print(f"Time taken for inference: {round(output_time,2)} seconds")


Result: {'error': 'Internal Server Error'}
Time taken for inference: 60.31 seconds
