In [13]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch, os, json

# ── paths ──────────────────────────────────────────────────────────────
BASE_NAME   = "google/gemma-2-2b-it"              # full checkpoint on HF Hub
ADAPTER_DIR = "/workspace/Gemma-2-2b-it-ChatDoctor-MedQA"  # where adapter_config.json lives
OUT_DIR     = "gemma2-2b-chatdoctor-merged"       # <- will be created

# ── 1. load base model & tokenizer (FULL precision) ────────────────────
tok = AutoTokenizer.from_pretrained(BASE_NAME, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    BASE_NAME,
    torch_dtype=torch.float16,        # or bfloat16 if your GPU supports it
    device_map={"": 0},               # GPU‑0; use "cpu" if no GPU (needs ~12 GB RAM)
    low_cpu_mem_usage=True,
    trust_remote_code=True,
)

# ── 2. make the vocab 256 002 tokens long ──────────────────────────────
if model.get_input_embeddings().num_embeddings == 256_000:
    # add two placeholder tokens; names don't really matter
    extra = ["<extra_0>", "<extra_1>"]
    tok.add_tokens(extra, special_tokens=True)
    model.resize_token_embeddings(len(tok))       # now 256002 × 2304
    print("Resized embeddings to", len(tok))

# ── 3. attach LoRA adapter and merge ───────────────────────────────────
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
model = model.merge_and_unload()                  # succeeds because sizes now match

# ── 4. save the merged checkpoint ──────────────────────────────────────
os.makedirs(OUT_DIR, exist_ok=True)
model.save_pretrained(OUT_DIR, safe_serialization=True)
tok.save_pretrained(OUT_DIR)

print("✅ merged model written to", os.path.abspath(OUT_DIR))


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Resized embeddings to 256002
✅ merged model written to /workspace/Gemma-2-2b-it-ChatDoctor-MedQA/gemma2-2b-chatdoctor-merged


In [16]:
#!pip install -U huggingface_hub git-lfs   # git‑lfs must also be on PATH
from huggingface_hub import login

# Hugging Face
login()                       # auto‑detects HF_TOKEN

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [23]:
from huggingface_hub import login
import os

# Your merged model is already saved in this directory
merged_model_path = "gemma2-2b-chatdoctor-merged"

# Verify the merged model exists
if os.path.exists(merged_model_path):
    print(f"✅ Found merged model at: {os.path.abspath(merged_model_path)}")
    
    # List files in the directory
    files = os.listdir(merged_model_path)
    print("Files in merged model directory:")
    for file in files:
        print(f"  - {file}")
else:
    print(f"❌ Merged model directory not found: {merged_model_path}")
    exit()

# Login to Hugging Face (if not already logged in)
# login()  # Uncomment if you need to login

# Load the merged model and tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer

print("Loading merged model for upload...")
model = AutoModelForCausalLM.from_pretrained(
    merged_model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(merged_model_path, trust_remote_code=True)

# Upload to Hugging Face
repo_name = "Cshavi/gemma2-2b-chatdoctor-medqa_merged"

print(f"Uploading model to {repo_name}...")
model.push_to_hub(repo_name, use_temp_dir=False)
tokenizer.push_to_hub(repo_name, use_temp_dir=False)

print(f"✅ Model uploaded successfully to: https://huggingface.co/{repo_name}")

✅ Found merged model at: /workspace/Gemma-2-2b-it-ChatDoctor-MedQA/gemma2-2b-chatdoctor-merged
Files in merged model directory:
  - tokenizer.json
  - special_tokens_map.json
  - tokenizer_config.json
  - chat_template.jinja
  - model.safetensors.index.json
  - model-00002-of-00002.safetensors
  - model-00001-of-00002.safetensors
  - generation_config.json
  - config.json
Loading merged model for upload...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Uploading model to Cshavi/gemma2-2b-chatdoctor-medqa_merged...


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

✅ Model uploaded successfully to: https://huggingface.co/Cshavi/gemma2-2b-chatdoctor-medqa_merged
