In [1]:
from kaggle_secrets import UserSecretsClient
secret_label = "HF_TOKEN"
secret_value = UserSecretsClient().get_secret(secret_label)

ConnectionError: Connection error trying to communicate with service.

In [None]:
config = {
    "training_config": {
        "per_device_train_batch_size":6,
        "gradient_accumulation_steps":4,
        "warmup_steps":0.03,
        "max_steps":84,
        "learning_rate":8.8e-5,
        "logging_steps": 1
    }
}

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

#set the qunatization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
#
#Load the model and Tokenizer
model_id = "Qwen/Qwen2-1.5B-Instruct"
#
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

In [None]:
config_dataset={    "training_dataset": {
        "name": "ruslanmv/ai-medical-dataset", # The dataset name(huggingface/datasets)
        "split": "train",  # The dataset split
        "input_fields": ["question", "context"] ,# The input fields
        "input_field": "text",# The input field
    },
               }

In [None]:
config_dataset.get("training_dataset")

In [None]:
from datasets import load_dataset
# Loading the training dataset
train_dataset = load_dataset(config_dataset.get("training_dataset").get("name"), split = config_dataset.get("training_dataset").get("split"))

In [None]:
train_dataset

In [None]:
test_dataset = train_dataset.select(range(100))
test_dataset

In [None]:
test_dataset[1]

In [None]:
medical_prompt = """You are an AI Medical Assistant Chatbot, trained to answer medical questions. Below is an instruction that describes a task, paired with an response context. Write a response that appropriately completes the request.

### Instruction:
{}


### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["question"]
    outputs      = examples["context"]
    texts = []
    for instruction, output in zip(instructions,  outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = medical_prompt.format(instruction,  output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [None]:
test_dataset= test_dataset.map(formatting_prompts_func, batched = True,)

In [None]:
test_dataset

In [None]:
test_dataset['text'][1]

In [None]:
is_test=True
if is_test:
    train_dataset=test_dataset
else:
    train_dataset= train_dataset.map(formatting_prompts_func, batched = True,)

In [None]:
train_dataset['text'][1]

In [None]:
import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)
#
modules = find_all_linear_names(model)
print(modules)

In [None]:
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
#
print(model)

#

lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.091,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
trainable, total = model.get_nb_trainable_parameters()
print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")

In [None]:
import transformers

from trl import SFTTrainer

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side='right'
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    dataset_text_field=config_dataset.get("training_dataset").get("input_field"),
    peft_config=lora_config,
    max_seq_length=2500,
    dataset_num_proc = 2,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=config.get("training_config").get("per_device_train_batch_size"),
        gradient_accumulation_steps=config.get("training_config").get("gradient_accumulation_steps"),
        warmup_steps=config.get("training_config").get("warmup_steps"),
        max_steps=config.get("training_config").get("max_steps"),
        learning_rate=config.get("training_config").get("learning_rate"),
        logging_steps=config.get("training_config").get("logging_steps"),
        output_dir="outputs",
        optim="paged_adamw_8bit",
        save_strategy="epoch",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
#
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
##USE THIS TO FINE TUNE HYPERPARAMS USING OPTUNA. YOU CAN ADD MORE HYPERPARAMS IN THE SEARCH SPACE FOR FINE TUNING.
## GIVEN BELOW ARE THE BEST HYPERPARAMS FOUND AFTER RUNNING THIS EXPERIMENT.
# Best Hyperparameters: {'learning_rate': 8.834086710159391e-05, 'per_device_train_batch_size': 6, 'gradient_accumulation_steps': 4, 'lora_alpha': 32, 'lora_dropout': 0.09057747591882871, 'max_steps': 84.35209460217322}
# Best Training Loss: 0.03177290491759777
    
    
# from optuna import create_study, Trial


# search_space = {
#     "per_device_train_batch_size":[1, 2, 4, 8],
#     "learning_rate": [1e-5, 5e-5, 1e-4, 2e-4],
#     "lora_alpha": [8, 16, 32],
#     "max_steps": [50, 100, 150],
#     "gradient_accumulation_steps": [1, 2, 4, 8],
#     "lora_dropout": [0.05, 0.1, 0.2 ]
# }

# def objective(trial):
#     # set hyperparams based on trial values
#     config["training_config"]["learning_rate"] = trial.suggest_float("learning_rate", search_space["learning_rate"][0], search_space["learning_rate"][-1])
#     config["training_config"]["per_device_train_batch_size"] = trial.suggest_int("per_device_train_batch_size", search_space["per_device_train_batch_size"][0], search_space["per_device_train_batch_size"][-1])
#     config["training_config"]["gradient_accumulation_steps"] = trial.suggest_int("gradient_accumulation_steps", search_space["gradient_accumulation_steps"][0], search_space["gradient_accumulation_steps"][-1])
#     lora_config.lora_alpha = trial.suggest_int("lora_alpha", search_space["lora_alpha"][0], search_space["lora_alpha"][-1])
#     lora_config.lora_dropout = trial.suggest_float("lora_dropout", search_space["lora_dropout"][0], search_space["lora_dropout"][-1])
#     config["training_config"]["max_steps"] = trial.suggest_float("max_steps", search_space["max_steps"][0], search_space["max_steps"][-1])

#     trainer_stats = trainer.train()
#     return trainer_stats[1]
    

# study = create_study(direction="minimize")
# study.optimize(objective, n_trials=16)  # Adjust the number of trials

# # Access the best trial and its hyperparameters after optimization
# best_trial = study.best_trial
# best_params = best_trial.params

# print("Best Trial:", best_trial.number)
# print("Best Hyperparameters:", best_params)
# print("Best Training Loss:", best_trial.value)

In [None]:
from huggingface_hub import login
login(secret_value, add_to_git_credential = True)

new_model = "qwen2-1.5B-medical_qa-Finetune" 
#
trainer.model.save_pretrained(new_model)
#
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)
merged_model= PeftModel.from_pretrained(base_model, new_model)
merged_model= merged_model.merge_and_unload()

# Save the merged model
#save_adapter=True, save_config=True
merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
#
# Push the model and tokenizer to the Hugging Face Model Hub
merged_model.config.to_json_file("adapter_config.json")
merged_model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

In [None]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  <start_of_turn>user
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  {query}
  <end_of_turn>\n<start_of_turn>model


  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  # decoded = tokenizer.batch_decode(generated_ids)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return (decoded)
#
query = """\n\n Please answer with one of the option in the bracket. Write reasoning in between <analysis></analysis>. Write answer in between <answer></answer>. here are the inputs Q:An 8-year-old boy is brought to the pediatrician by his mother with nausea, vomiting, and decreased frequency of urination. He has acute lymphoblastic leukemia for which he received the 1st dose of chemotherapy 5 days ago. His leukocyte count was 60,000/mm3 before starting chemotherapy. The vital signs include: pulse 110/min, temperature 37.0°C (98.6°F), and blood pressure 100/70 mm Hg. The physical examination shows bilateral pedal edema. Which of the following serum studies and urinalysis findings will be helpful in confirming the diagnosis of this condition? ? \n{'A': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, and extremely elevated creatine kinase (MM)', 'B': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, hyperuricemia, urine supernatant pink, and positive for heme', 'C': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and urate crystals in the urine', 'D': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, and urinary monoclonal spike', 'E': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and oxalate crystals'}"""

result = get_completion(query=query, model=merged_model, tokenizer=tokenizer)
print(result)

In [None]:
print(f"Model Answer : \n {result.split('model')[-1]}")

In [None]:
query = """Please answer with one of the option in the bracket. Write reasoning in between <analysis></analysis>. Write answer in between <answer></answer>.here are the inputs:Q:A 34-year-old man presents to a clinic with complaints of abdominal discomfort and blood in the urine for 2 days. He has had similar abdominal discomfort during the past 5 years, although he does not remember passing blood in the urine. He has had hypertension for the past 2 years, for which he has been prescribed medication. There is no history of weight loss, skin rashes, joint pain, vomiting, change in bowel habits, and smoking. On physical examination, there are ballotable flank masses bilaterally. The bowel sounds are normal. Renal function tests are as follows:\nUrea 50 mg/dL\nCreatinine 1.4 mg/dL\nProtein Negative\nRBC Numerous\nThe patient underwent ultrasonography of the abdomen, which revealed enlarged kidneys and multiple anechoic cysts with well-defined walls. A CT scan confirmed the presence of multiple cysts in the kidneys. What is the most likely diagnosis?? \n{'A': 'Autosomal dominant polycystic kidney disease (ADPKD)', 'B': 'Autosomal recessive polycystic kidney disease (ARPKD)', 'C': 'Medullary cystic disease', 'D': 'Simple renal cysts', 'E': 'Acquired cystic kidney disease'}"""
result = get_completion(query=query, model=merged_model, tokenizer=tokenizer)
print(f"Model Answer : \n {result.split('model')[-1]}")

In [None]:
from peft import LoraConfig, PeftModel, AutoPeftModelForCausalLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import hf_hub_download, hf_hub_url

# Set the LoRA configurations
peft_config = LoraConfig(
    r=64,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Model ID to use
peft_model_id = "Irathernotsay/qwen2-1.5B-medical_qa-Finetune"

# Verify model existence
try:
    hf_hub_download(repo_id=peft_model_id)
    model_exists = True
except Exception as e:
    print(f"Model {peft_model_id} not found: {e}")
    model_exists = False


# Load the configuration from the model
try:
    config = peft_config.from_pretrained(peft_model_id)
except ValueError as e:
    print(f"Configuration loading error: {e}")

# Load the model and tokenizer
try:
    model = AutoModelForCausalLM.from_pretrained(peft_model_id,
                                                 return_dict=True,
                                                 load_in_4bit=True,
                                                 device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")

In [None]:
!pip install -U transformers

In [None]:
from peft import LoraConfig,PeftModel,AutoPeftModelForCausalLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from huggingface_hub import login
login(secret_value, add_to_git_credential = True)

#set the LoRA configurations
peft_config =LoraConfig(
    r=64,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
#
peft_model_id = "Irathernotsay/qwen2-1.5B-medical_qa-Finetune"
# config = peft_config.from_pretrained(peft_model_id)
#
device = "cuda:0"

model = AutoModelForCausalLM.from_pretrained(peft_model_id).to(device)
ptokenizer = AutoTokenizer.from_pretrained(peft_model_id)

In [None]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  <start_of_turn>user
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  {query}
  <end_of_turn>\n<start_of_turn>model


  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  # decoded = tokenizer.batch_decode(generated_ids)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return (decoded)

In [None]:
query = """Hi Doctor, I have a high fever, headache and nausea. What medicines should I take?\n"""
result = get_completion(query=query, model=model, tokenizer=ptokenizer)
print(f"Model Answer : \n {result.split('model')[-1]}")

In [None]:
query = """Please answer with one of the option in the bracket. Write reasoning in between <analysis></analysis>. Write answer in between <answer></answer>.here are the inputs:Q:A 34-year-old man presents to a clinic with complaints of abdominal discomfort and blood in the urine for 2 days. He has had similar abdominal discomfort during the past 5 years, although he does not remember passing blood in the urine. He has had hypertension for the past 2 years, for which he has been prescribed medication. There is no history of weight loss, skin rashes, joint pain, vomiting, change in bowel habits, and smoking. On physical examination, there are ballotable flank masses bilaterally. The bowel sounds are normal. Renal function tests are as follows:\nUrea 50 mg/dL\nCreatinine 1.4 mg/dL\nProtein Negative\nRBC Numerous\nThe patient underwent ultrasonography of the abdomen, which revealed enlarged kidneys and multiple anechoic cysts with well-defined walls. A CT scan confirmed the presence of multiple cysts in the kidneys. What is the most likely diagnosis?? \n{'A': 'Autosomal dominant polycystic kidney disease (ADPKD)', 'B': 'Autosomal recessive polycystic kidney disease (ARPKD)', 'C': 'Medullary cystic disease', 'D': 'Simple renal cysts', 'E': 'Acquired cystic kidney disease'}"""
result = get_completion(query=query, model=model, tokenizer=ptokenizer)
print(f"Model Answer : \n {result.split('model')[-1]}")

In [None]:
print(result)

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
  return "UTF-8"
locale.getpreferredencoding = getpreferredencoding
!git clone https://github.com/ggerganov/llama.cpp
!cd llama.cpp && GGML_CUDA=1 make && pip install -r requirements/requirements-

In [None]:
from huggingface_hub import snapshot_download
model_name = "Irathernotsay/qwen2-1.5B-medical_qa-Finetune"
methods = ['q4_k_m']
base_model = "./original_model/"
quantized_path = "./quantized_model/"
#
snapshot_download(repo_id=model_name, local_dir=base_model , local_dir_use_symlinks=False)
original_model = quantized_path+'/FP16.gguf'

In [None]:
!mkdir ./quantized_model/

In [None]:
!python /kaggle/working/llama.cpp/convert_hf_to_gguf.py ./original_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf


In [None]:
!chmod +x ./llama.cpp/examples/quantize

In [None]:
import os
for m in methods:
  qtype = f"{quantized_path}/{m.upper()}.gguf"
  os.system("./llama.cpp/examples/quantize "+quantized_path+"/FP16.gguf "+qtype+" "+m)

In [None]:
! ./llama.cpp/quantize ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r "User:" -f llama.cpp/prompts/chat-with-bob.txt


In [None]:
from huggingface_hub import HfApi, HfFolder, create_repo, upload_file

from huggingface_hub import login
login(secret_value, add_to_git_credential = True)
      
model_path = "./Q4_K_M.gguf" # Your model's local path
repo_name = "qwen2-1.5b-medical_qa-GGUF"  # Desired HF Hub repository name
repo_url = create_repo(repo_name, private=False)

In [None]:
api = HfApi()
api.upload_file(
    path_or_fileobj=model_path,
    path_in_repo="/kaggle/working/quantized_model/FP16.gguf",
    repo_id="Irathernotsay/qwen2-1.5B-medical_qa-Finetune-GGUF",
    repo_type="model",
)

In [None]:
!wget "https://huggingface.co/Plaban81/gemma-medical_qa-GGUF/resolve/main/Q4_K_M.gguf"

In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python

In [None]:
from llama_cpp import Llama

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = Llama(
  model_path="/content/Q4_K_M.gguf",  # Download the model file first
  n_ctx=32768,  # The max sequence length to use - note that longer sequence lengths require much more resources
  n_threads=1,            # The number of CPU threads to use, tailor to your system and the resulting performance
  n_gpu_layers=-1         # The number of layers to offload to GPU, if you have GPU acceleration available
)

In [None]:
query = """Please answer with one of the option in the bracket. Write reasoning in between <analysis></analysis>. Write answer in between <answer></answer>. here are the inputs Q:An 8-year-old boy is brought to the pediatrician by his mother with nausea, vomiting, and decreased frequency of urination. He has acute lymphoblastic leukemia for which he received the 1st dose of chemotherapy 5 days ago. His leukocyte count was 60,000/mm3 before starting chemotherapy. The vital signs include: pulse 110/min, temperature 37.0°C (98.6°F), and blood pressure 100/70 mm Hg. The physical examination shows bilateral pedal edema. Which of the following serum studies and urinalysis findings will be helpful in confirming the diagnosis of this condition? ? \n{'A': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, and extremely elevated creatine kinase (MM)', 'B': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, hyperuricemia, urine supernatant pink, and positive for heme', 'C': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and urate crystals in the urine', 'D': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, and urinary monoclonal spike', 'E': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and oxalate crystals'}"""
output = llm(
  prompt=query,
  max_tokens=512,  # Generate up to 512 tokens
)
output

In [None]:
query = """\n\n Please answer with one of the option in the bracket. Write reasoning in between <analysis></analysis>. Write answer in between <answer></answer>. here are the inputs Q:An 8-year-old boy is brought to the pediatrician by his mother with nausea, vomiting, and decreased frequency of urination. He has acute lymphoblastic leukemia for which he received the 1st dose of chemotherapy 5 days ago. His leukocyte count was 60,000/mm3 before starting chemotherapy. The vital signs include: pulse 110/min, temperature 37.0°C (98.6°F), and blood pressure 100/70 mm Hg. The physical examination shows bilateral pedal edema. Which of the following serum studies and urinalysis findings will be helpful in confirming the diagnosis of this condition? ? \n{'A': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, and extremely elevated creatine kinase (MM)', 'B': 'Hyperkalemia, hyperphosphatemia, hypocalcemia, hyperuricemia, urine supernatant pink, and positive for heme', 'C': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and urate crystals in the urine', 'D': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, and urinary monoclonal spike', 'E': 'Hyperuricemia, hyperkalemia, hyperphosphatemia, lactic acidosis, and oxalate crystals'}"""
output = llm(
  prompt=query,
  max_tokens=512,  # Generate up to 512 tokens
)

output

In [None]:
print(output["choices"][0]["text"].split("<end_of_turn>\n<end_of_turn>model")[-1])