In [1]:
%pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

Note: you may need to restart the kernel to use updated packages.
usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert notebook run server troubleshoot trust

Jupyter command `jupyter-nbextension` not found.


# 1. Install required packages (run once)

In [2]:
%pip install transformers datasets peft accelerate
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0
Note: you may need to restart the kernel to use updated packages.


# 2. Import libs

In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# 3. Load dataset

In [4]:
dataset = load_dataset("GBaker/MedQA-USMLE-4-options")

print(dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases'],
        num_rows: 10178
    })
    test: Dataset({
        features: ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases'],
        num_rows: 1273
    })
})


# 4. Preprocess dataset

In [5]:
def preprocess(example):
    # for MCQ, format input and output text
    input_text = example['question']
    output_text = example['answer']
    return {"input": input_text, "output": output_text}


In [6]:
train_dataset = dataset["train"].map(preprocess)

In [7]:
print(train_dataset)

Dataset({
    features: ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases', 'input', 'output'],
    num_rows: 10178
})


# 5. Hugging face token for loading model

In [8]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path='.env')

HF_TOEKN = os.getenv('HF_TOEKN')


login(token=HF_TOEKN)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# 6. Load tokenizer and model

In [9]:
from transformers import AutoModelForCausalLM

# model_name = "meta-llama/Llama-3.2-3B"
model_name = "meta-llama/Llama-3.2-1B" # see if training becomes faster?


tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
  model_name,
  # device_map="auto", # automatically assign device (GPU/CPU/MPS)
)

# No need to save locally
# model.save_pretrained("./llama-3b")
# tokenizer.save_pretrained("./llama-3b")

# 6. Apply LoRA PEFT


In [10]:
lora_config = LoraConfig(
    r=8, 
    lora_alpha=16, 
    target_modules=["q_proj", "v_proj"], 
    lora_dropout=0.1, 
    bias="none"
)

model = get_peft_model(model, lora_config)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


# 8. Tokenize inputs for training

In [11]:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

def tokenize_func(examples):
    # Prepare inputs (prompt + answer)
    inputs = [f"Question: {q}\nAnswer: {a}" for q, a in zip(examples["input"], examples["output"])]
    tokenized = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors=None,
    )
    # For causal LM, labels = input_ids (model shifts internally)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized


print("Before", train_dataset.column_names)
train_dataset = train_dataset.map(tokenize_func, batched=True, remove_columns=train_dataset.column_names)
print("After", train_dataset.column_names)


Before ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases', 'input', 'output']
After ['input_ids', 'attention_mask', 'labels']


# 8. Setup training args

In [12]:
training_args = TrainingArguments(
    output_dir="./llama3b-lora-finetuned",
    per_device_train_batch_size=1,
    num_train_epochs=1,
    learning_rate=2e-4,
    logging_steps=1,
    save_steps=30,
    save_total_limit=2,
    fp16=False,
    remove_unused_columns=False
)

# 9. Check if running on CPU OR GPU (For Apple silicon)
If MPS available and MPS built are True and model_device shows mps, you are running on your Apple Silicon GPU.


In [13]:
import torch

print("MPS available:", torch.backends.mps.is_available())
print("MPS built:", torch.backends.mps.is_built())

model_device = next(model.parameters()).device
print("Model device:", model_device)

MPS available: True
MPS built: True
Model device: cpu


# 10. Create Trainer and train

In [14]:
import logging
logging.basicConfig(level=logging.INFO)

print("fp16:", training_args.fp16)

trainer = Trainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset
)


trainer.train()

fp16: False


No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
1,0.5859
2,0.2947
3,0.4612
4,0.8713
5,0.476
6,0.3682
7,0.4005
8,0.5411
9,0.8391
10,0.4677


KeyboardInterrupt: 

# 11. Generate gguf file

In [None]:
model.save_pretrained_gguf("chat_nedicine", tokenizer, quantization_method="f16")

# 12. Download Model

In [None]:
# from google.colab import files
# files.download('/content/chat_doc_gpt_model/unsloth.F16.gguf')