In [6]:
!pip install -q transformers datasets accelerate peft bitsandbytes sentencepiece huggingface_hub

In [7]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from accelerate import Accelerator
import json

In [8]:
BASE_MODEL = "stabilityai/stablelm-tuned-alpha-3b"  # choose a <7B model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    device_map="auto",
)
# Prepare model for LoRA/k-bit training
model = prepare_model_for_kbit_training(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
print(model)

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50688, 4096)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-15): 16 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (query_key_value): Linear8bitLt(in_features=4096, out_features=12288, bias=True)
          (dense): Linear8bitLt(in_features=4096, out_features=4096, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear8bitLt(in_features=4096, out_features=16384, bias=True)
          (dense_4h_to_h): Linear8bitLt(in_features=16384, out_features=4096, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (final_layer_norm): Lay

In [10]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value", "dense"],  # adjust per model; for some models use ['linear']
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)


In [11]:
# Inspect the model's layer names to find the correct target modules
for name, module in model.named_modules():
    print(name)


base_model
base_model.model
base_model.model.gpt_neox
base_model.model.gpt_neox.embed_in
base_model.model.gpt_neox.emb_dropout
base_model.model.gpt_neox.layers
base_model.model.gpt_neox.layers.0
base_model.model.gpt_neox.layers.0.input_layernorm
base_model.model.gpt_neox.layers.0.post_attention_layernorm
base_model.model.gpt_neox.layers.0.post_attention_dropout
base_model.model.gpt_neox.layers.0.post_mlp_dropout
base_model.model.gpt_neox.layers.0.attention
base_model.model.gpt_neox.layers.0.attention.query_key_value
base_model.model.gpt_neox.layers.0.attention.query_key_value.base_layer
base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_dropout
base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_dropout.default
base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_A
base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_A.default
base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_B
base_model.model.gpt_neox.layers.0.at

In [12]:

ds = load_dataset("lavita/AlpaCare-MedInstruct-52k")

train_val_test = ds["train"].train_test_split(test_size=0.10, seed=42)
train_ds = train_val_test["train"]
temp_ds = train_val_test["test"]

val_test_split = temp_ds.train_test_split(test_size=0.5, seed=42)
val_ds = val_test_split["train"]
test_ds = val_test_split["test"]

print("Train size:", len(train_ds))
print("Validation size:", len(val_ds))
print("Test size:", len(test_ds))

Train size: 46801
Validation size: 2600
Test size: 2601


In [13]:
def preprocess_fn(example):
    inst = example.get("instruction") or example.get("prompt") or ""
    resp = example.get("response") or example.get("output") or ""
    prompt = f"Instruction: {inst}\n\nResponse: {resp}"
    tokenized = tokenizer(prompt, truncation=True, max_length=512)
    return tokenized

# Apply preprocessing
train_ds = train_ds.map(lambda ex: preprocess_fn(ex), remove_columns=train_ds.column_names)
val_ds   = val_ds.map(lambda ex: preprocess_fn(ex), remove_columns=val_ds.column_names)
test_ds  = test_ds.map(lambda ex: preprocess_fn(ex), remove_columns=test_ds.column_names)


Map:   0%|          | 0/46801 [00:00<?, ? examples/s]

In [19]:
pip install --upgrade transformers


Collecting transformers
  Downloading transformers-4.57.0-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.57.0-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.56.2
    Uninstalling transformers-4.56.2:
      Successfully uninstalled transformers-4.56.2
Successfully installed transformers-4.57.0


In [14]:
adapter_dir = "adapters/alpacare_lora"
model.save_pretrained(adapter_dir)
tokenizer.save_pretrained(adapter_dir)

# Optional: Zip for download
!zip -r alpacare_lora_adapter.zip adapters/alpacare_lora


  adding: adapters/alpacare_lora/ (stored 0%)
  adding: adapters/alpacare_lora/special_tokens_map.json (deflated 75%)
  adding: adapters/alpacare_lora/tokenizer_config.json (deflated 92%)
  adding: adapters/alpacare_lora/README.md (deflated 66%)
  adding: adapters/alpacare_lora/adapter_config.json (deflated 55%)
  adding: adapters/alpacare_lora/adapter_model.safetensors (deflated 69%)
  adding: adapters/alpacare_lora/tokenizer.json (deflated 81%)


In [15]:
from google.colab import files
files.download("alpacare_lora_adapter.zip")
# or save to drive
# from google.colab import drive
# drive.mount('/content/drive')
# !cp -r adapters/alpacare_lora /content/drive/MyDrive/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>