# Load Quantized Model

In [None]:
!pip install bitsandbytes

In [None]:
!pip install accelerate

In [None]:
!pip install --upgrade transformers

In [1]:
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
import torch


In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [3]:
model_name = "Writer/palmyra-base"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

In [35]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config = bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 4096)
    (wpe): Embedding(2048, 4096)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Linear4bit(in_features=4096, out_features=12288, bias=True)
          (c_proj): Linear4bit(in_features=4096, out_features=4096, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Linear4bit(in_features=4096, out_features=16384, bias=True)
          (c_proj): Linear4bit(in_features=16384, out_features=4096, bias=True)
          (act): GELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, element

In [None]:
prompt = "A 5-year-old male is brought to the pediatrician with complaints of a painful mouth/gums, and vesicular lesions on the lips and buccal mucosa for the past 4 days. The patient has not been able to eat or drink due to the pain and has been irritable. The patient also reports muscle aches. His vital signs are as follows: T 39.1, HR 110, BP 90/62, RR 18, SpO2 99%. Physical examination is significant for vesicular lesions noted on the tongue, gingiva, and lips, with some vesicles having ruptured and ulcerated, as well as palpable cervical and submandibular lymphadenopathy. Patient is diagnosed with ."

input_text = (
    "You're medical expert answer with the name of the disease only "
    "Only give name of disease based on following symptoms"
    "Symptoms: {prompt} "
    "Disease:"
)

model_inputs = tokenizer(input_text.format(prompt=prompt), return_tensors="pt").to(
    "cuda"
)

gen_conf = {
    "temperature": 0.7,
    "repetition_penalty": 1.0,
    "max_new_tokens": 512,
    "do_sample": True,
}

out_tokens = model.generate(**model_inputs, **gen_conf)

response_ids = out_tokens[0][len(model_inputs.input_ids[0]) :]
output = tokenizer.decode(response_ids, skip_special_tokens=True)

print(output)

# Dataset Prep

In [None]:
#wtf
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!pip install datasets==2.16

In [8]:
from datasets import Dataset
from datasets import load_dataset

In [9]:
# ms_train = load_dataset("json", data_files="/content/drive/MyDrive/FineTuning/merged_data.jsonl")
ms_train = load_dataset("json", data_files="/kaggle/input/dataset/merged_data.jsonl")

In [10]:
ms_train

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'options', 'meta_info', 'answer_idx'],
        num_rows: 12723
    })
})

In [11]:
ms_train = ms_train['train'].remove_columns(['options', 'meta_info', 'answer_idx'])

In [12]:
ms_train = ms_train.train_test_split(test_size=0.1)

In [13]:
ms_train

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 11450
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 1273
    })
})

In [14]:
ms_train['train'][5]

{'question': 'A data analyst is putting systolic blood pressure values into a spreadsheet for a research study on hypertension during pregnancy. The majority of systolic blood pressure values fall between 130 and 145. For one of the study participants, she accidentally types “1400” instead of “140”. Which of the following statements is most likely to be correct?',
 'answer': 'The median is now smaller than the mean'}

# Fine-Tuning

In [None]:
!pip install --upgrade trl

In [None]:
!pip install -U peft

In [15]:
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

2024-03-28 09:52:30.141873: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-28 09:52:30.141932: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-28 09:52:30.143358: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [16]:
new_model = "palmyra-ducky"

In [17]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [18]:
print_trainable_parameters(model)

trainable params: 214642688 || all params: 2631446528 || trainable%: 8.156832590595586


In [41]:
from peft import prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model)

In [42]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["c_attn", "c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [43]:
from transformers import TrainingArguments

In [44]:
training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir="/content",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

In [45]:
from torch.nn.parallel import DataParallel

In [33]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# model = torch.nn.DataParallel(model, device_ids = [0,1]).to(device)

In [46]:
trainer = SFTTrainer(
    model=model,
    train_dataset=ms_train["train"],
    eval_dataset=ms_train["test"],
    peft_config=config,
    dataset_text_field="question",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/11450 [00:00<?, ? examples/s]

Map:   0%|          | 0/1273 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [47]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [48]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
286,1.9539,1.645662
572,1.6716,1.594079
858,1.8268,1.570193
1144,1.63,1.558954
1430,1.8811,1.557177




TrainOutput(global_step=1430, training_loss=1.716476732867581, metrics={'train_runtime': 14474.1356, 'train_samples_per_second': 1.582, 'train_steps_per_second': 0.099, 'total_flos': 1.1094507384736973e+17, 'train_loss': 1.716476732867581, 'epoch': 2.0})

In [51]:
from huggingface_hub import notebook_login

In [52]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [56]:
from peft import PeftConfig

In [54]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [53]:
trainer.push_to_hub()

events.out.tfevents.1711620213.932ac703c91b.125.0:   0%|          | 0.00/5.21k [00:00<?, ?B/s]

events.out.tfevents.1711620763.932ac703c91b.125.1:   0%|          | 0.00/308k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/34.6M [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SumedhJoshi/content/commit/524654e48af91d653c1c23d02464f7a3edcf1167', commit_message='End of training', commit_description='', oid='524654e48af91d653c1c23d02464f7a3edcf1167', pr_url=None, pr_revision=None, pr_num=None)

In [57]:
PEFT_MODEL = "SumedhJoshi/content" 

config = PeftConfig.from_pretrained(PEFT_MODEL)
peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

peft_model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token

adapter_config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/34.6M [00:00<?, ?B/s]

In [58]:
def get_response(question):

    prompt = f"""
      ###Instruction: You are a medical expert. Suggest the disease from the given text which can replace the '[MASK]' in the text.
      If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.

      ###Question: {question}

      ###Response: 

      """

    peft_encoding = peft_tokenizer(prompt, return_tensors="pt").to("cuda:0")
    peft_outputs = peft_model.generate(input_ids=peft_encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = peft_tokenizer.eos_token_id, \
                                                                                                                      eos_token_id = peft_tokenizer.eos_token_id, attention_mask = peft_encoding.attention_mask, \
                                                                                                                      temperature=0.1, top_p=0.1, repetition_penalty=1.2, num_return_sequences=1,))
    peft_text_output = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

    print(f'Response from fine-tuned model:\n{peft_text_output}')
  #print(dashline)
    

In [60]:
from transformers import GenerationConfig