In [6]:
%cd /workspace/lane_seg

req = """\
transformers==4.55.0
accelerate>=0.33.0
evaluate>=0.4.2
torch>=2.1
opencv-python-headless>=4.8
pillow>=10.3
numpy>=1.26
"""
with open("requirements.txt","w") as f:
    f.write(req)

!pip install -r requirements.txt

/workspace/lane_seg
Collecting transformers==4.55.0 (from -r requirements.txt (line 1))
  Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)
Collecting accelerate>=0.33.0 (from -r requirements.txt (line 2))
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate>=0.4.2 (from -r requirements.txt (line 3))
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting opencv-python-headless>=4.8 (from -r requirements.txt (line 5))
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers==4.55.0->-r requirements.txt (line 1))
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers==4.55.0->-r requirements.txt (line 1))
  Downloading regex-2025.7.34-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Colle

In [9]:
import os, glob, zipfile, shutil

# 1) ZIP 자동 탐색 (이름에 'png-mask-semantic' 들어간 걸 찾아요)
cands = sorted(glob.glob("/workspace/*png-mask-semantic*.zip"))
if not cands:
    raise FileNotFoundError("'/workspace'에 *png-mask-semantic*.zip이 없습니다. Files 패널로 업로드했는지 확인!")
ZIP_PATH = cands[0]  # 첫 번째 걸 사용
print("ZIP_PATH =", ZIP_PATH)

# 2) 압축 풀 대상 폴더
EXTRACT_DIR = "/workspace/ds_rf"   # <- 여기가 extract_dir (원하면 다른 폴더명도 가능)

# 3) 깨끗이 비우고 다시 생성
if os.path.isdir(EXTRACT_DIR):
    shutil.rmtree(EXTRACT_DIR)
os.makedirs(EXTRACT_DIR, exist_ok=True)

# 4) 압축 해제
with zipfile.ZipFile(ZIP_PATH, "r") as z:
    z.extractall(EXTRACT_DIR)

print("✅ Unzipped to:", EXTRACT_DIR)
print("📂 Top-level:", os.listdir(EXTRACT_DIR))

ZIP_PATH = /workspace/---.v1i.png-mask-semantic.zip
✅ Unzipped to: /workspace/ds_rf
📂 Top-level: ['valid', 'train', 'test', 'README.roboflow.txt', 'README.dataset.txt']


In [10]:
# 팔레트 옵션 지원 + 기본 팔레트(1:노랑, 2:시안, 3:파랑, 4:흰색)
code = r'''
import os, glob, argparse, cv2, numpy as np, torch
from PIL import Image
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

def load_model(model_dir, device):
    processor = SegformerImageProcessor.from_pretrained(model_dir)
    model = SegformerForSemanticSegmentation.from_pretrained(model_dir).to(device).eval()
    return processor, model

@torch.no_grad()
def predict_mask(proc, model, bgr):
    img = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
    inputs = proc(images=img, return_tensors="pt").to(model.device)
    logits = model(**inputs).logits
    up = torch.nn.functional.interpolate(logits, size=img.size[::-1], mode="bilinear", align_corners=False)
    return up.argmax(dim=1)[0].cpu().numpy().astype(np.uint8)

def overlay_multiclass(bgr, mask, palette, alpha=0.5):
    color = np.zeros_like(bgr)
    for cid, col in palette.items():
        if cid == 0:  # background
            continue
        color[mask == cid] = col  # BGR tuple
    return cv2.addWeighted(bgr, 1.0, color, alpha, 0)

def parse_palette(palette_str):
    # "1:#FFFF00,2:#00FFFF,3:#0000FF,4:#FFFFFF"  (RGB hex → BGR tuple)
    pal = {}
    items = [s.strip() for s in palette_str.split(",") if s.strip()]
    for it in items:
        cid_str, hexv = it.split(":")
        cid = int(cid_str)
        hexv = hexv.strip().lstrip("#")
        r = int(hexv[0:2], 16); g = int(hexv[2:4], 16); b = int(hexv[4:6], 16)
        pal[cid] = (b, g, r)
    return pal

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--model_dir", required=True)
    ap.add_argument("--input")
    ap.add_argument("--images_dir")
    ap.add_argument("--output", required=True)
    ap.add_argument("--alpha", type=float, default=0.5)
    ap.add_argument("--fps", type=int, default=None)
    ap.add_argument("--device", default="cuda")
    ap.add_argument("--palette", default="", help="예: '1:#FFFF00,2:#00FFFF,3:#0000FF,4:#FFFFFF' (RGB hex)")
    args = ap.parse_args()

    device = args.device if (args.device == "cuda" and torch.cuda.is_available()) else "cpu"
    proc, model = load_model(args.model_dir, device)

    # 기본 팔레트(요청하신 색으로 설정)
    palette = {
        1: (0, 255, 255),  # lane        → Yellow
        2: (255, 255, 0),  # lane_dot    → Cyan
        3: (255, 0, 0),    # lane_mid    → Blue
        4: (255, 255, 255) # crosswalk   → White
    }
    # 주의: 위 색은 BGR 순서입니다. parse_palette는 RGB hex를 받아 BGR로 변환합니다.

    if args.palette:
        palette = parse_palette(args.palette)

    if (args.input is None) == (args.images_dir is None):
        raise SystemExit("하나만 지정: --input (video)  또는  --images_dir (folder)")

    if args.input:
        cap = cv2.VideoCapture(args.input)
        assert cap.isOpened(), f"cannot open {args.input}"
        frames = []
        src_fps = cap.get(cv2.CAP_PROP_FPS) or 15
        use_fps = args.fps or src_fps
        while True:
            ok, frame = cap.read()
            if not ok:
                break
            m = predict_mask(proc, model, frame)
            out = overlay_multiclass(frame, m, palette, args.alpha)
            frames.append(out)
        cap.release()
        h, w = frames[0].shape[:2]
        for fourcc_str in ["mp4v", "avc1", "XVID"]:
            vw = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*fourcc_str), use_fps, (w, h))
            if vw.isOpened():
                for f in frames:
                    vw.write(f)
                vw.release()
                break
        print("✅ Saved:", args.output)
    else:
        paths = sorted([p for p in glob.glob(os.path.join(args.images_dir, "*")) if os.path.isfile(p)])
        assert paths, f"No images in {args.images_dir}"
        frames = []
        for p in paths:
            bgr = cv2.imread(p)
            if bgr is None:
                continue
            m = predict_mask(proc, model, bgr)
            out = overlay_multiclass(bgr, m, palette, args.alpha)
            frames.append(out)
        h, w = frames[0].shape[:2]
        vw = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*"mp4v"), args.fps or 15, (w, h))
        for f in frames:
            vw.write(f)
        vw.release()
        print("✅ Saved:", args.output)
'''
open("/workspace/lane_seg/infer_segformer_video.py","w").write(code)
print("✅ infer_segformer_video.py updated (palette supported)")

✅ infer_segformer_video.py updated (palette supported)


In [6]:
!python /workspace/lane_seg/train_segformer.py \
  --data_dir /workspace/ds_rf \
  --output_dir /workspace/segformer-lane-mc \
  --epochs 15 --batch 4 \
  --ckpt /workspace/segformer-lane/best \
  --classes "background,lane,lane_dot,lane_mid,lane_crosswalk"

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at /workspace/segformer-lane/best and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([5]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([2, 256, 1, 1]) in the checkpoint and torch.Size([5, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
🔎 value_map: {0: 0, 1: 1, 3: 2, 4: 3, 5: 4}
{'loss': 1.3736, 'grad_norm': 5.303241729736328, 'learning_rate': 4.954119850187266e-05, 'epoch': 0.14}
{'loss': 0.931, 'grad_norm': 4.344597339630127, 'learning_rate': 4.9073033707865176e-05, 'epoch': 0.28}
{'loss': 0.6168, 'grad_norm': 3.3623993396759033, 'learning_rate': 4.860486891385768e-05, 'epoch': 0.42}
{'loss': 0.4113, 'grad_norm': 1.8639006614685059, 'learning_rate': 4.813

In [22]:
#이 셀을 실행하기 전에, 동영상을 먼저 업로드해야 이름을 확인해줌.
import glob, os
print([os.path.basename(p) for p in glob.glob("/workspace/*.mp4")])

['KakaoTalk_20250707_100128756.mp4']


In [11]:
#내가 업로드한 input 동영상을 입력.
!python /workspace/lane_seg/infer_segformer_video.py --model_dir /workspace/segformer-lane-mc/best --input "/workspace/KakaoTalk_20250707_100128756.mp4" --output "/workspace/out_lane_overlay_mc.mp4" --alpha 0.5 --device cuda --palette "1:#FFFF00,2:#00FFFF,3:#0000FF,4:#FFFFFF"

✅ Saved: /workspace/out_lane_overlay_mc.mp4
