# Medical Model Export (Kaggle Edition)

This notebook exports your fine-tuned adapters to GGUF format using Kaggle's GPUs.

## Prerequisites
1.  **Upload Checkpoints**: You must upload your `checkpoints` folder as a Kaggle Dataset.
2.  **Add Dataset**: Add that dataset to this notebook (it will appear in `/kaggle/input`).
3.  **GPU**: Ensure Accelerator is set to GPU T4 x2 or P100.

In [None]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    !pip install --no-deps xformers trl peft accelerate bitsandbytes

In [None]:
from unsloth import FastLanguageModel
import torch
import os

# Kaggle Input Directory
INPUT_DIR = "/kaggle/input/"
OUTPUT_DIR = "/kaggle/working/"

print(f"Listing all files in {INPUT_DIR} to find checkpoints...")
CHECKPOINT_DIR = None

# Recursive search for 'adapter_config.json' to find the actual adapter folder
adapter_candidates = []
for root, dirs, files in os.walk(INPUT_DIR):
    if "adapter_config.json" in files:
        print(f"Found adapter candidate at: {root}")
        adapter_candidates.append(root)

if not adapter_candidates:
    print("ERROR: No 'adapter_config.json' found anywhere in input. Did you upload the dataset correctly?")
    print("Full directory listing:")
    for root, dirs, files in os.walk(INPUT_DIR):
        print(f"{root} -> {dirs}, {files}")
else:
    # Sort candidates to find the latest stage (assuming naming convention or just pick last)
    # We prefer paths containing 'Stage4', then 'Stage3', etc.
    adapter_candidates.sort(key=lambda x: x, reverse=True)
    CHECKPOINT_DIR = adapter_candidates[0]
    print(f"Selected adapter: {CHECKPOINT_DIR}")

In [None]:
# 1. Load Base Model
max_seq_length = 2048
dtype = None
load_in_4bit = True

print("Loading base model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# 2. Load Adapter
if CHECKPOINT_DIR:
    print(f"Loading adapter from {CHECKPOINT_DIR}...")
    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
    )
    model.load_adapter(CHECKPOINT_DIR)
else:
    print("Skipping adapter load due to missing path.")

In [None]:
# 3. Export to GGUF
if CHECKPOINT_DIR:
    print("Saving to GGUF (q4_k_m)...")
    try:
        model.save_pretrained_gguf("model_recovered", tokenizer, quantization_method = "q4_k_m")
        
        # Move to Output
        !mv model_recovered-unsloth.Q4_K_M.gguf /kaggle/working/medical_llama3_kaggle.gguf
        print("SUCCESS! Model saved to /kaggle/working/medical_llama3_kaggle.gguf")
        print("Download it from the 'Output' section on the right sidebar.")
    except Exception as e:
        print(f"Export failed: {e}")