In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install -q accelerate peft bitsandbytes transformers trl datasets torch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.4/293.4 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import accelerate
import peft
import bitsandbytes
import transformers
import trl
import datasets

In [4]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

import warnings
warnings.filterwarnings('ignore')

In [5]:
if torch.cuda.is_available():
    print(torch.cuda.device_count(), torch.cuda.get_device_name(0), torch.cuda.get_device_properties(0).total_memory / 1e9)

2 Tesla T4 15.828320256


In [6]:
dataset = load_dataset("nlpie/Llama2-MedTuned-Instructions")

README.md:   0%|          | 0.00/2.96k [00:00<?, ?B/s]

(…)-00000-of-00001-a8790d88efc2bc45.parquet:   0%|          | 0.00/91.1M [00:00<?, ?B/s]

(…)-00000-of-00001-b543c64b1786c03e.parquet:   0%|          | 0.00/6.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/200252 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/70066 [00:00<?, ? examples/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 200252
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
})

In [8]:
# Show the first 3 rows
for i in range(3):
    data = dataset['train'][i]
    print(f"Data Point {i + 1}:")
    print("Instruction >>>", data['instruction'])
    print("Input       >>>", data['input'])
    print("Output      >>>", data['output'])
    print("\n-----------------------------\n")

Data Point 1:
Instruction >>> In your role as a medical professional, address the user's medical questions and concerns.
Input       >>> My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.
Output      >>> Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health.

-----------

In [9]:
# selecting some data to train the model fast
dataset["train"] = dataset["train"].select(range(3500))
dataset["test"]  = dataset["train"].select(range(300))

In [10]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 3500
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 300
    })
})

In [37]:
# creating the prompt 
def create_prompt(sample):
    prompt = sample["instruction"]
    prompt += sample["input"]
    
    single_turn_prompt = f"""Instruction: {prompt}<|end_of_turn|>AI Assistant: {sample["output"]}"""
    return single_turn_prompt

In [12]:
create_prompt(dataset['train'][0])

"Instruction: In your role as a medical professional, address the user's medical questions and concerns. My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.<|end_of_turn|>AI Assistant: Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health."

In [14]:
create_prompt(dataset['train'][10])

'Instruction: In the clinical text, your objective is to identify relationships between medical problems, treatments, and tests. Medical problems are tagged as @problem$, medical tests as @test$, and treatments as @treatment$. Classify the relationship between two entities as one of the following:\nTreatment improves medical problem (TrIP)\nTreatment worsens medical problem (TrWP)\nTreatment causes medical problem (TrCP)\nTreatment is administered for medical problem (TrAP)\nTreatment is not administered because of medical problem (TrNAP)\nTest reveals medical problem (TeRP)\nTest conducted to investigate medical problem (TeCP)\nMedical problem indicates medical problem (PIP)\nNo Relations Digoxin 0.125 mg q.d. , @treatment$ 80 mg q.a.m. and 40 mg q.p.m. aspirin 1 q.d. , and @treatment$ three puffs b.i.d.<|end_of_turn|>AI Assistant: No Relations'

In [15]:
# Quantization step
bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_compute_dtype="float16",
                                bnb_4bit_use_double_quant=True)

In [16]:
# Loading the LLM
# https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha

repository_hf = "berkeley-nest/Starling-LM-7B-alpha"

In [17]:
# Load the LLM applying quantization
llm_model = AutoModelForCausalLM.from_pretrained(
    repository_hf,
    quantization_config=bnb_config,
    device_map="auto",
    use_cache=False
    )

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

In [18]:
# Load the LLM tokenizer
tokenizer = AutoTokenizer.from_pretrained(repository_hf)

# Padding 
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

In [20]:
tokenizer.eos_token, tokenizer.eos_token_id

('<|end_of_turn|>', 32000)

In [21]:
def generate_response_before_fine_tuning(prompt, model):

    # Apply tokenizer
    encoded_input = tokenizer(prompt,
                              return_tensors="pt", # pytorch
                              add_special_tokens=True)

    # Input to tensor
    model_inputs = encoded_input.to("cuda")

    # Generate response
    generated_ids = model.generate(**model_inputs,
                                   max_new_tokens=1024,
                                   do_sample=True,
                                   pad_token_id=tokenizer.eos_token_id)

    # Decoding the response
    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0].replace(prompt, "")

In [22]:
# Example
prompt = """Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:
Contradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. """
prompt += '''Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|>'''
prompt += "AI Assistant:"

In [24]:
generate_response_before_fine_tuning(prompt, llm_model)

'<s> Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:\nContradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|> AI Assistant: Entailment<|end_of_turn|>'

In [25]:
# Define LoRA parameters with PEFT
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
    )

In [26]:
# Prepare the model for the fine-tuning step
llm_model = prepare_model_for_kbit_training(llm_model)

In [27]:
# Concatenate the base model with the LoRA parameters
llm_model = get_peft_model(llm_model, peft_config)

In [48]:
# # Model hyperparameters
# training_arguments = TrainingArguments(output_dir="model_finetuned",
#                                        per_device_train_batch_size=1,
#                                        gradient_accumulation_steps=4,
#                                        optim="paged_adamw_32bit",
#                                        learning_rate=2e-4,
#                                        lr_scheduler_type="cosine",
#                                        save_strategy="epoch",
#                                        logging_steps=10,
#                                        num_train_epochs=1,
#                                        max_steps=250,
#                                        fp16=torch.cuda.is_available(), # True
#                                        disable_tqdm=False
# )



PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [53]:
import torch

print("Is GPU available?", torch.cuda.is_available())
print("Using device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))

from transformers import logging

logging.set_verbosity_info()  # Set verbosity to show detailed information

training_arguments = TrainingArguments(
    output_dir="./model_finetuned",         # Output directory
    per_device_train_batch_size=4,  # Adjust batch size to GPU memory
    per_device_eval_batch_size=4,   # Adjust evaluation batch size
    num_train_epochs=1,             # Number of training epochs
    logging_dir="./logs",           # Logging directory
    evaluation_strategy="steps",    # Evaluation strategy
    save_steps=10,                  # Save every 10 steps
    save_total_limit=2,             # Keep only last 2 models
    logging_steps=5,                # Log every 5 steps
    report_to="none",               # Disable logging services like WandB
    load_best_model_at_end=True,    # Automatically load the best model at the end
    fp16=torch.cuda.is_available(), # Use mixed precision if GPU is available
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    disable_tqdm=False
)

using `logging_steps` to initialize `eval_steps` to 5
PyTorch: setting up devices


Is GPU available? True
Using device: cuda


In [54]:
def create_prompt(sample):
    prompt = sample["instruction"]
    prompt += sample["input"]
    
    single_turn_prompt = f"""Instruction: {prompt}<|end_of_turn|>AI Assistant: {sample["output"]}"""
    return [single_turn_prompt]  # Wrap the result in a list

In [55]:
# Supervised Fine-Tuning Trainer (SFTT) https://huggingface.co/docs/trl/sft_trainer
trainer = SFTTrainer(     
    model=llm_model,
    peft_config=peft_config,
    # max_seq_length=512,
    tokenizer=tokenizer,
    # packing=True,
    formatting_func=create_prompt,
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"]
                   )

PyTorch: setting up devices
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
Using auto half precision backend


In [56]:
%%time 
trainer.train()

***** Running training *****
  Num examples = 4
  Num Epochs = 1
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 1
  Number of trainable parameters = 3,407,872


Step,Training Loss,Validation Loss


Saving model checkpoint to ./model_finetuned/checkpoint-1
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--berkeley-nest--Starling-LM-7B-alpha/snapshots/1dddf3b95bc1391f6307299eb1c162c194bde9bd/config.json
Model config MistralConfig {
  "_name_or_path": "openchat/openchat_3.5",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.47.0",
  "use_cache": true,
  "vocab_size": 32002
}

tokenizer config file saved in ./model_finetuned/checkpoint-1/tok

CPU times: user 7.32 s, sys: 6 s, total: 13.3 s
Wall time: 13.4 s


TrainOutput(global_step=1, training_loss=1.2472082376480103, metrics={'train_runtime': 13.024, 'train_samples_per_second': 0.307, 'train_steps_per_second': 0.077, 'total_flos': 174835535708160.0, 'train_loss': 1.2472082376480103, 'epoch': 1.0})

In [58]:
trainer.save_model("model_finetuned")

Saving model checkpoint to model_finetuned
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--berkeley-nest--Starling-LM-7B-alpha/snapshots/1dddf3b95bc1391f6307299eb1c162c194bde9bd/config.json
Model config MistralConfig {
  "_name_or_path": "openchat/openchat_3.5",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.47.0",
  "use_cache": true,
  "vocab_size": 32002
}

tokenizer config file saved in model_finetuned/tokenizer_config.json
Special tok

In [59]:
fine_tuned_model = llm_model.merge_and_unload()

In [60]:
def generate_response(prompt, model):

    encoded_input = tokenizer(prompt,
                              return_tensors="pt",
                              add_special_tokens=True)

    model_inputs = encoded_input.to("cuda")

    generated_ids = model.generate(**model_inputs,
                                   max_new_tokens=512,
                                   do_sample=True,
                                   use_cache=False,
                                   pad_token_id=tokenizer.eos_token_id)

    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0]

In [61]:
%%time
prompt = "Instruction: In your role as a medical professional, address the user's medical questions and concerns. "
prompt += "I have a white tab under my tounge that is not only painful when i touch it but bleeds as well. not sure what it is, or why I got it. Can you give me any advise? <|end_of_turn|> "
prompt += "AI Assistant:"
response = generate_response(prompt, fine_tuned_model)

from pprint import pprint
pprint(response)

("<s> Instruction: In your role as a medical professional, address the user's "
 'medical questions and concerns. I have a white tab under my tounge that is '
 'not only painful when i touch it but bleeds as well. not sure what it is, or '
 "why I got it. Can you give me any advise? <|end_of_turn|>  AI Assistant: I'm "
 "not a doctor, but I can offer some general advice. It's important to get any "
 'unusual oral symptoms, like a painful and bleeding spot under your tongue, '
 'checked out by a healthcare professional. It could potentially be due to a '
 'variety of causes, including infection, injury, or oral disease. \n'
 '\n'
 "As with any medical concerns, it's important to get a proper diagnosis from "
 'a qualified healthcare provider. They can perform an evaluation and '
 'recommend treatment options if necessary.\n'
 '\n'
 'Here are some additional tips that might help:\n'
 '\n'
 '1. Maintain good oral hygiene by brushing your teeth at least twice a day '
 'and flossing regular