In [4]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [5]:
import os
from pathlib import Path
import torch

from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# HF auth (Gemma is gated)
login(token=os.environ["HF_TOKEN"])

BASE_MODEL_ID = "google/gemma-3-1b-it"
ADAPTER_DIR = Path("/content/drive/MyDrive/slm-hosting-playbook-artifacts/live_backup/gemma-3-1b-it-lora-20260114-034307").resolve()

# 0) Hard verify adapter files exist locally
print("Adapter dir:", ADAPTER_DIR)
print("adapter_config exists:", (ADAPTER_DIR / "adapter_config.json").exists())
print("adapter_model exists:", (ADAPTER_DIR / "adapter_model.safetensors").exists())
assert (ADAPTER_DIR / "adapter_config.json").exists(), "Missing adapter_config.json in adapter dir"
assert (ADAPTER_DIR / "adapter_model.safetensors").exists(), "Missing adapter_model.safetensors in adapter dir"

# 1) Load base model + tokenizer
tok = AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_fast=True)
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)

# 2) Attach adapter (force local path string)
model = PeftModel.from_pretrained(base, str(ADAPTER_DIR), is_trainable=False)
model.eval()

prompt = "User: Explain monopsony in simple terms with a labor market example.\nAssistant:"
inputs = tok(prompt, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}

with torch.no_grad():
    out = model.generate(
        **inputs,
        max_new_tokens=120,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
    )

print(tok.decode(out[0], skip_special_tokens=True))


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Adapter dir: /content/drive/MyDrive/slm-hosting-playbook-artifacts/live_backup/gemma-3-1b-it-lora-20260114-034307
adapter_config exists: True
adapter_model exists: True
User: Explain monopsony in simple terms with a labor market example.
Assistant: Monopsony is a market structure where one or a few companies control a significant portion of the market, making it difficult for smaller companies to compete. In the labor market, this can happen when a few large companies have the power to hire and fire workers, or when a small number of companies control a large portion of the industry. This can lead to low wages, fewer job opportunities, and less job security for workers.

Example: Let's say there are only three companies in the food processing industry. If one of these companies is acquired by a larger company, it could gain control over
