# GroundedDINO + SAM — Detection (Colab Pro+)
Детектор регионов: монтируем GCS (через сервис‑аккаунт), ставим Torch+детекторы, рендерим страницы, запускаем детекцию и грузим регионы в `gs://pik-artifacts-dev/grounded_regions/`.


In [None]:
#@title Runtime & GPU
# Runtime & GPU
!nvidia-smi || true
import sys; print(sys.version)


In [None]:
#@title Auth + gcsfuse setup
# Install packages and prepare gcsfuse repo; auto-mount with SA from /content/Secrets if present
from google.colab import auth; auth.authenticate_user()
!pip -q install google-cloud-storage gcsfs
!curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
!echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt gcsfuse-jammy main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!sudo apt-get -q update
!sudo apt-get -q install -y gcsfuse poppler-utils
!mkdir -p /content/src_gcs /content/artifacts /content/pages /content/Secrets
import glob, os, subprocess, shlex
matches = glob.glob('/content/Secrets/*.json')
if matches:
  KEY = matches[0]
  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = KEY
  print('Found SA key:', KEY)
  subprocess.run(shlex.split('fusermount -u /content/src_gcs'), check=False)
  subprocess.run(shlex.split('fusermount -u /content/artifacts'), check=False)
  code1 = subprocess.run(['gcsfuse','--implicit-dirs','--key-file',KEY,'pik_source_bucket','/content/src_gcs']).returncode
  code2 = subprocess.run(['gcsfuse','--implicit-dirs','--key-file',KEY,'pik-artifacts-dev','/content/artifacts']).returncode
  print('mount src=', code1==0, ' mount artifacts=', code2==0)
else:
  # Ручная загрузка ключа (как раньше)
  from google.colab import files
  print('Загрузите JSON-ключ сервис-аккаунта (sa.json)')
  uploaded = files.upload()
  if uploaded:
    KEY = '/content/' + list(uploaded.keys())[0]
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = KEY
    subprocess.run(shlex.split('fusermount -u /content/src_gcs'), check=False)
    subprocess.run(shlex.split('fusermount -u /content/artifacts'), check=False)
    c1 = subprocess.run(['gcsfuse','--implicit-dirs','--key-file',KEY,'pik_source_bucket','/content/src_gcs']).returncode
    c2 = subprocess.run(['gcsfuse','--implicit-dirs','--key-file',KEY,'pik-artifacts-dev','/content/artifacts']).returncode
    print('mount src=', c1==0, ' mount artifacts=', c2==0)
  else:
    KEY = ''


In [None]:
#@title Mount GCS buckets
# Mount GCS buckets with SA key
KEY_PATH = f'/content/{KEY}'
!gcsfuse --implicit-dirs --key-file $KEY_PATH pik_source_bucket /content/src_gcs
!gcsfuse --implicit-dirs --key-file $KEY_PATH pik-artifacts-dev /content/artifacts
!ls -la /content/src_gcs | head -n 20
!echo ok > /content/artifacts/_healthcheck.txt && ls -l /content/artifacts/_healthcheck.txt


In [None]:
#@title Install Torch + SAM/SAM2 + GroundedDINO
# 2) Torch + детекторы — надёжная установка (без сборки wheel для GroundedDINO)
!pip -q install --upgrade pip setuptools wheel
!pip -q install torch torchvision --index-url https://download.pytorch.org/whl/cu121
!pip -q install shapely timm opencv-python pycocotools addict yacs requests pillow
!pip -q install huggingface_hub
!pip -q install 'jedi>=0.18.2'
# SAM
!pip -q install git+https://github.com/facebookresearch/segment-anything.git
# SAM2
!pip -q install git+https://github.com/facebookresearch/segment-anything-2.git
# GroundedDINO из исходников (подключим через sys.path)
!rm -rf /content/GroundingDINO
!git clone --depth 1 https://github.com/IDEA-Research/GroundingDINO.git /content/GroundingDINO
!pip -q install -r /content/GroundingDINO/requirements.txt
import sys
if '/content/GroundingDINO' not in sys.path: sys.path.append('/content/GroundingDINO')
from groundingdino.util.inference import Model
print('GroundedDINO import OK')


In [None]:
#@title Download/Resolve Model Weights
#@title Download/Resolve Model Weights
# (Optional) Download model weights if not present
import os, pathlib, shutil, subprocess
from typing import Optional
pathlib.Path('/content/models/groundingdino').mkdir(parents=True, exist_ok=True)
pathlib.Path('/content/models/sam').mkdir(parents=True, exist_ok=True)
GROUNDING_MODEL = '/content/models/groundingdino/groundingdino_swint_ogc.pth'
SAM_MODEL = '/content/models/sam/sam_vit_h_4b8939.pth'
GROUNDING_URL = 'https://github.com/IDEA-Research/GroundingDINO/releases/download/0.1.0/groundingdino_swint_ogc.pth'
SAM_URL = 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth'
SAM2_MODEL = '/content/models/sam2/sam2_hiera_large.pt'
SAM2_URL = 'https://huggingface.co/facebook/sam2-hiera-large/resolve/main/sam2_hiera_large.pt'
# Try to read HF token from Colab Keys or env
HF_TOKEN = os.getenv('HF_TOKEN', '')
try:
  from google.colab import userdata as _ud
  HF_TOKEN = _ud.get('HF_TOKEN') or HF_TOKEN
except Exception:
  pass
def _file_ok(p: str, min_size: int) -> bool:
  try:
    return os.path.exists(p) and os.path.getsize(p) >= min_size
  except Exception:
    return False
def _try_torch_load(p: str) -> bool:
  try:
    import torch
    torch.load(p, map_location='cpu')
    return True
  except Exception as e:
    print('[warn] torch.load failed:', e)
    return False
def _hf_download(repo_id: str, filename: str, dest: str) -> bool:
  try:
    from huggingface_hub import hf_hub_download, login
    if HF_TOKEN:
      try:
        login(token=HF_TOKEN)
      except Exception as e:
        print('[warn] HF login failed:', e)
    ckpt = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=os.path.dirname(dest), local_dir_use_symlinks=False, token=HF_TOKEN or None)
    if ckpt != dest:
      shutil.copy2(ckpt, dest)
    return True
  except Exception as e:
    print('[warn] HF download failed:', e)
    return False
def _curl(url: str, dest: str, min_size: int) -> bool:
  cmd = f"curl -L --fail --retry 5 --retry-all-errors -o '{dest}.tmp' '{url}'"
  rc = subprocess.call(cmd, shell=True)
  if rc == 0 and _file_ok(dest + '.tmp', min_size):
    shutil.move(dest + '.tmp', dest)
    return True
  else:
    print('[warn] curl download insufficient or failed:', rc)
    try:
      os.remove(dest + '.tmp')
    except Exception:
      pass
    return False
# GroundedDINO (expect ~0.9GB)
MIN_DINO = 600_000_000
need_dino = (not _file_ok(GROUNDING_MODEL, MIN_DINO)) or (not _try_torch_load(GROUNDING_MODEL))
if need_dino:
  print('Downloading GroundingDINO weights (robust)...')
  try:
    os.remove(GROUNDING_MODEL)
  except Exception:
    pass
  # 1) Try GCS mirror if mounted
  gcs_mirror = '/content/artifacts/models/groundingdino/groundingdino_swint_ogc.pth'
  ok = _file_ok(gcs_mirror, MIN_DINO)
  if ok:
    try:
      shutil.copy2(gcs_mirror, GROUNDING_MODEL)
      ok = True
    except Exception as e:
      print('[warn] copy from GCS mirror failed:', e); ok = False
  # 2) Try HF Hub (public repo)
  if (not ok):
    ok = _hf_download('ShilongLiu/GroundingDINO', 'groundingdino_swint_ogc.pth', GROUNDING_MODEL)
  # 3) Try GitHub release via curl
  if (not ok) or (not _file_ok(GROUNDING_MODEL, MIN_DINO)):
    ok = _curl(GROUNDING_URL, GROUNDING_MODEL, MIN_DINO)
  # 4) Try gsutil from bucket path if accessible
  if (not ok) or (not _file_ok(GROUNDING_MODEL, MIN_DINO)):
    try:
      rc = subprocess.call(f"gsutil cp gs://pik-artifacts-dev/models/groundingdino/groundingdino_swint_ogc.pth '{GROUNDING_MODEL}'", shell=True)
      ok = (rc == 0) and _file_ok(GROUNDING_MODEL, MIN_DINO)
    except Exception as e:
      print('[warn] gsutil mirror copy failed:', e)
  if (not ok) or (not _file_ok(GROUNDING_MODEL, MIN_DINO)) or (not _try_torch_load(GROUNDING_MODEL)):
    raise SystemExit('Failed to fetch a valid GroundingDINO checkpoint')
# SAM (ViT-H is large; check size only)
MIN_SAM = 1_000_000_000
if not _file_ok(SAM_MODEL, MIN_SAM):
  print('Downloading SAM ViT-H weights (robust)...')
  # 1) Try GCS mirror if mounted
  sam_gcs_mirror = '/content/artifacts/models/sam/sam_vit_h_4b8939.pth'
  ok = _file_ok(sam_gcs_mirror, MIN_SAM)
  if ok:
    try:
      shutil.copy2(sam_gcs_mirror, SAM_MODEL)
      ok = True
    except Exception as e:
      print('[warn] copy SAM from GCS mirror failed:', e); ok = False
  # 2) Try HF Hub
  if (not ok):
    ok = _hf_download('facebook/sam', 'sam_vit_h_4b8939.pth', SAM_MODEL)
  # 3) Try official URL via curl
  if (not ok) or (not _file_ok(SAM_MODEL, MIN_SAM)):
    ok = _curl(SAM_URL, SAM_MODEL, MIN_SAM)
  # 4) Try gsutil mirror from bucket
  if (not ok) or (not _file_ok(SAM_MODEL, MIN_SAM)):
    try:
      rc = subprocess.call(f"gsutil cp gs://pik-artifacts-dev/models/sam/sam_vit_h_4b8939.pth '{SAM_MODEL}'", shell=True)
      ok = (rc == 0) and _file_ok(SAM_MODEL, MIN_SAM)
    except Exception as e:
      print('[warn] gsutil SAM mirror copy failed:', e)
  if (not ok) or (not _file_ok(SAM_MODEL, MIN_SAM)):
    raise SystemExit('Failed to fetch SAM ViT-H checkpoint')
# SAM2 (Hiera Large)
MIN_SAM2 = 700_000_000
if not _file_ok(SAM2_MODEL, MIN_SAM2):
  print('Downloading SAM2 Hiera Large weights (robust)...')
  # 1) Try GCS mirror if mounted
  sam2_gcs_mirror = '/content/artifacts/models/sam2/sam2_hiera_large.pt'
  ok = _file_ok(sam2_gcs_mirror, MIN_SAM2)
  if ok:
    try:
      shutil.copy2(sam2_gcs_mirror, SAM2_MODEL)
      ok = True
    except Exception as e:
      print('[warn] copy SAM2 from GCS mirror failed:', e); ok = False
  # 2) Try HF Hub
  if (not ok):
    ok = _hf_download('facebook/sam2-hiera-large', 'sam2_hiera_large.pt', SAM2_MODEL)
  # 3) Try direct URL via curl
  if (not ok) or (not _file_ok(SAM2_MODEL, MIN_SAM2)):
    ok = _curl(SAM2_URL, SAM2_MODEL, MIN_SAM2)
  # 4) Try gsutil mirror from bucket
  if (not ok) or (not _file_ok(SAM2_MODEL, MIN_SAM2)):
    try:
      rc = subprocess.call(f"gsutil cp gs://pik-artifacts-dev/models/sam2/sam2_hiera_large.pt '{SAM2_MODEL}'", shell=True)
      ok = (rc == 0) and _file_ok(SAM2_MODEL, MIN_SAM2)
    except Exception as e:
      print('[warn] gsutil SAM2 mirror copy failed:', e)
  if (not ok) or (not _file_ok(SAM2_MODEL, MIN_SAM2)):
    raise SystemExit('Failed to fetch SAM2 Hiera Large checkpoint')
print('GROUNDING_MODEL =', GROUNDING_MODEL)
print('SAM_MODEL       =', SAM_MODEL)


In [None]:
#@title Detection Parameters
PLAYBOOK_PDF = '/content/src_gcs/playbooks/PIK - Expert Guide - Platform IT Architecture - Playbook - v11.pdf'  #@param {type:"string"}
PAGES = [4,5,6,7,8,9,10,11]  #@param {type:"raw"}
FRAME_NAMES_INPUT = 'PIK - Platform IT Architecture Canvas - Table View - v01.png, PIK - Platform IT Architecture Canvases - v01.png, PIK - Expert Guide - Platform IT Architecture - Assessment - v01.png'  #@param {type:"string"}
PROMPTS_INPUT = 'diagram,canvas,table,legend,arrow,node'  #@param {type:"string"}
BOX_THRESHOLD = 0.35  #@param {type:"number"}
TEXT_THRESHOLD = 0.25  #@param {type:"number"}
TOPK = 12  #@param {type:"integer"}
DEVICE = 'auto'  #@param ["auto", "cuda", "cpu"]
USE_SAM2 = True  #@param {type:"boolean"}
# Derived lists from string inputs
FRAME_NAMES = [x.strip() for x in FRAME_NAMES_INPUT.split(',') if x.strip()]
PROMPTS = [x.strip() for x in PROMPTS_INPUT.split(',') if x.strip()]
OUT_PAGES_DIR = '/content/pages'
DETECT_OUT = '/content/grounded_regions'
print('PDF:', PLAYBOOK_PDF, 'Pages:', PAGES)


In [None]:
#@title Render Pages to PNG
# Render selected pages to PNG (robust: checks poppler + PDF presence; falls back to gsutil cp)
import os, shutil, pathlib, subprocess
from subprocess import check_call
pathlib.Path(OUT_PAGES_DIR).mkdir(parents=True, exist_ok=True)
# Ensure pdftoppm exists
if not shutil.which('pdftoppm'):
  print('Installing poppler-utils (pdftoppm)...')
  check_call(['bash','-lc','sudo apt-get -q update && sudo apt-get -q install -y poppler-utils'])
# Ensure source PDF exists; if not, copy from GCS via gsutil
src = PLAYBOOK_PDF
if not os.path.exists(src):
  print('PDF not found at', src, '; copying from GCS...')
  check_call(['bash','-lc','gsutil -m cp "gs://pik_source_bucket/playbooks/PIK - Expert Guide - Platform IT Architecture - Playbook - v11.pdf" /content/Playbook.pdf'])
  src = '/content/Playbook.pdf'
# Render pages
for p in PAGES:
  print('Rendering', p)
  check_call(['pdftoppm','-png','-singlefile','-r','150', src, f'{OUT_PAGES_DIR}/page-{p}'])
!ls -la /content/pages | head -n 10


In [None]:
#@title Run Detection: GroundedDINO → SAM/SAM2
# Боевой режим: GroundedDINO → SAM (5 страниц + 3 фрейма)
import os, json, pathlib, cv2, numpy as np, torch
from groundingdino.util.inference import Model
# SAM/SAM2 init with fallback and device control
_req = (DEVICE.lower() if 'DEVICE' in globals() else 'auto')
if _req == 'cuda' and not torch.cuda.is_available():
  print('[warn] CUDA requested but not available; using CPU')
  device = 'cpu'
elif _req == 'cpu':
  device = 'cpu'
else:
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Selected device:', device)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
sam2_predictor = None
if 'USE_SAM2' in globals() and USE_SAM2:
  try:
    from sam2.build_sam import build_sam2
    from sam2.sam2_image_predictor import SAM2ImagePredictor
    sam2_model = build_sam2('sam2_hiera_large', SAM2_MODEL, device=device)
    sam2_predictor = SAM2ImagePredictor(sam2_model)
    print('SAM2 ready on', device)
  except Exception as e:
    print('SAM2 init failed, fallback to SAM v1:', e)
    sam2_predictor = None
if sam2_predictor is None:
  from segment_anything import sam_model_registry, SamPredictor
  print('SAM v1 ready on', device)
CFG_PATH = '/content/GroundingDINO_SwinT_OGC.py'
CFG_URL = 'https://raw.githubusercontent.com/IDEA-Research/GroundingDINO/main/groundingdino/config/GroundingDINO_SwinT_OGC.py'
# Попытка скачать конфиг, если его нет
import urllib.request, urllib.error
def _download(url, path):
  try:
    urllib.request.urlretrieve(url, path)
    return os.path.exists(path) and os.path.getsize(path) > 1000
  except Exception:
    return False
if not os.path.exists(CFG_PATH):
  ok = _download(CFG_URL, CFG_PATH)
  if not ok:
    try:
      import groundingdino, os as _os
      CFG_PATH = _os.path.join(_os.path.dirname(groundingdino.__file__), 'config', 'GroundingDINO_SwinT_OGC.py')
      print('Using package config at', CFG_PATH)
    except Exception as e:
      raise FileNotFoundError('GroundingDINO config not found and download failed')
# Sanity check on DINO checkpoint
import torch
try:
  _ = torch.load(GROUNDING_MODEL, map_location='cpu')
except Exception as e:
  raise RuntimeError(f'GroundedDINO checkpoint invalid: {e}')
gd_model = Model(model_config_path=CFG_PATH, model_checkpoint_path=GROUNDING_MODEL, device=device)
def save_region(rdir, idx, img, xyxy):
  x0,y0,x1,y1 = map(int, xyxy)
  x0,y0 = max(0,x0), max(0,y0)
  crop = img[y0:y1, x0:x1] if y1>y0 and x1>x0 else img
  ok, buf = cv2.imencode('.png', crop)
  if ok: (rdir/f'region-{idx}.png').write_bytes(buf.tobytes())
  obj = { 'bbox': {'x':int(x0),'y':int(y0),'w':int(x1-x0),'h':int(y1-y0)}, 'text':'', 'image_b64':'' }
  (rdir/f'region-{idx}.json').write_text(json.dumps(obj, ensure_ascii=False), encoding='utf-8')
def detect_one(image_path, out_root):
  img = cv2.imread(image_path); assert img is not None, image_path
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  H,W = img_rgb.shape[:2]
  boxes, logits, phrases = gd_model.predict_with_classes(image=img_rgb, classes=PROMPTS, box_threshold=BOX_THRESHOLD, text_threshold=TEXT_THRESHOLD)
  bxs = []
  for b in boxes:
    b = np.asarray(b, dtype=float)
    if b.max()<=1.01: x0,y0,x1,y1 = b[0]*W, b[1]*H, b[2]*W, b[3]*H
    else: x0,y0,x1,y1 = b
    bxs.append([x0,y0,x1,y1])
  out = os.path.join(out_root, pathlib.Path(image_path).stem, 'regions'); ensure_dir(out)
  for i,xyxy in enumerate(bxs[:TOPK], start=1): save_region(pathlib.Path(out), i, img, xyxy)
# Страницы
ensure_dir(DETECT_OUT)
for p in PAGES:
  detect_one(f'/content/pages/page-{p}.png', DETECT_OUT)
# Фреймы
for name in FRAME_NAMES:
  f = f'/content/src_gcs/frames/{name}'
  if os.path.exists(f): detect_one(f, '/content/grounded_frames')
# Выгрузка
!gsutil -m rsync -r /content/grounded_regions gs://pik-artifacts-dev/grounded_regions/
!gsutil -m rsync -r /content/grounded_frames gs://pik-artifacts-dev/grounded_regions/
!gsutil ls -r gs://pik-artifacts-dev/grounded_regions | head -n 40


In [None]:
#@title Upload Regions to GCS
# Upload regions to pik-artifacts-dev
!gsutil -m rsync -r /content/grounded_regions gs://pik-artifacts-dev/grounded_regions/
!gsutil ls gs://pik-artifacts-dev/grounded_regions/ | head -n 20
