In [1]:
!pip install torch transformers accelerate bitsandbytes peft

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.me

In [2]:
# In a new cell in your Google Colab notebook

# --- Step 1: Access the Secret Token ---
# This securely loads the token you stored in the Secrets Manager.
from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')

# --- Step 2: Import necessary libraries ---
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# --- Configuration ---
base_model_id = "meta-llama/Llama-3.1-8B"
adapter_id = "SidMcStarter/legal-llama-adapters"
output_dir = "merged_legal_llama"

# --- Main Script ---
print(f"Loading base model: {base_model_id}")
# Load the base model, passing the token directly for authentication.
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    token=hf_token  # <--- Use the token here
)

tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    token=hf_token  # <--- And also here
)

print(f"\nLoading LoRA adapters from: {adapter_id}")
# Apply the adapters to the base model.
model = PeftModel.from_pretrained(
    base_model,
    adapter_id,
    token=hf_token  # <--- And here for the adapters
)

print("\nMerging adapters into the base model...")
# Merge the LoRA layers into the base model.
merged_model = model.merge_and_unload()
print("Merge complete.")

print(f"\nSaving the final merged model to: {output_dir}")
# Save the complete, merged model to the Colab filesystem.
merged_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print("\n--------------------------------------------------")
print(f"✅ Success! Your merged model is saved in the '{output_dir}' directory.")
print("This directory is now ready for the GGUF conversion process.")
print("--------------------------------------------------")

Loading base model: meta-llama/Llama-3.1-8B


config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'xpu', 'cuda', 'mps', 'npu', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'xpu', 'cuda', 'mps', 'npu', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend