In [6]:
%cd /workspace/lane_seg

req = """\
transformers==4.55.0
accelerate>=0.33.0
evaluate>=0.4.2
torch>=2.1
opencv-python-headless>=4.8
pillow>=10.3
numpy>=1.26
"""
with open("requirements.txt","w") as f:
    f.write(req)

!pip install -r requirements.txt

/workspace/lane_seg
Collecting transformers==4.55.0 (from -r requirements.txt (line 1))
  Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)
Collecting accelerate>=0.33.0 (from -r requirements.txt (line 2))
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate>=0.4.2 (from -r requirements.txt (line 3))
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting opencv-python-headless>=4.8 (from -r requirements.txt (line 5))
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers==4.55.0->-r requirements.txt (line 1))
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers==4.55.0->-r requirements.txt (line 1))
  Downloading regex-2025.7.34-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Colle

In [9]:
import os, glob, zipfile, shutil

# 1) ZIP 자동 탐색 (이름에 'png-mask-semantic' 들어간 걸 찾아요)
cands = sorted(glob.glob("/workspace/*png-mask-semantic*.zip"))
if not cands:
    raise FileNotFoundError("'/workspace'에 *png-mask-semantic*.zip이 없습니다. Files 패널로 업로드했는지 확인!")
ZIP_PATH = cands[0]  # 첫 번째 걸 사용
print("ZIP_PATH =", ZIP_PATH)

# 2) 압축 풀 대상 폴더
EXTRACT_DIR = "/workspace/ds_rf"   # <- 여기가 extract_dir (원하면 다른 폴더명도 가능)

# 3) 깨끗이 비우고 다시 생성
if os.path.isdir(EXTRACT_DIR):
    shutil.rmtree(EXTRACT_DIR)
os.makedirs(EXTRACT_DIR, exist_ok=True)

# 4) 압축 해제
with zipfile.ZipFile(ZIP_PATH, "r") as z:
    z.extractall(EXTRACT_DIR)

print("✅ Unzipped to:", EXTRACT_DIR)
print("📂 Top-level:", os.listdir(EXTRACT_DIR))

ZIP_PATH = /workspace/---.v1i.png-mask-semantic.zip
✅ Unzipped to: /workspace/ds_rf
📂 Top-level: ['valid', 'train', 'test', 'README.roboflow.txt', 'README.dataset.txt']


In [10]:
# === train_segformer.py 저장 (Transformers 4.x용) ===
train_py = r'''
import os, glob, re, argparse, numpy as np, evaluate, torch
from PIL import Image
from torch.utils.data import Dataset
from transformers import (
    SegformerImageProcessor, SegformerForSemanticSegmentation,
    TrainingArguments, Trainer
)

os.environ["WANDB_DISABLED"] = "true"

def is_img(n): return n.lower().endswith((".jpg",".jpeg",".png",".bmp",".tif",".tiff"))
def find_splits(root):
    out={}
    for n in ("train","valid","val","test"):
        p=os.path.join(root,n)
        if os.path.isdir(p): out["valid" if n in ("valid","val") else n]=p
    return out

_SUFFIX_RE = re.compile(r'(_|-)(mask|masks|label|labels|seg|segment|segmentation)$', re.I)
def stem_no_suffix(p):
    s=os.path.splitext(os.path.basename(p))[0]
    return _SUFFIX_RE.sub('', s)

class RFSegFolder(Dataset):
    def __init__(self, split_dir, processor, collapse_to_binary=True):
        img_cands=[os.path.join(split_dir,"images"), split_dir]
        self.img_dir=None
        for d in img_cands:
            if os.path.isdir(d) and any(is_img(f) for f in os.listdir(d)):
                self.img_dir=d; break
        if self.img_dir is None: raise RuntimeError(f"No images in {split_dir}")

        mask_cands=["masks","labels","annotations","masks_png","labels_png","mask","Labels","Masks"]
        self.mask_dirs=[os.path.join(split_dir,c) for c in mask_cands if os.path.isdir(os.path.join(split_dir,c))]
        if not self.mask_dirs:
            self.mask_dirs=[]
            for r,_,files in os.walk(split_dir):
                if os.path.abspath(r)==os.path.abspath(self.img_dir): continue
                if any(f.lower().endswith(".png") for f in files): self.mask_dirs.append(r)
            if not self.mask_dirs: self.mask_dirs=[split_dir]

        mask_map={}
        for md in self.mask_dirs:
            for p in glob.glob(os.path.join(md,"*.png")):
                mask_map[stem_no_suffix(p)]=p

        self.items=[]
        for ip in sorted(glob.glob(os.path.join(self.img_dir,"*.*"))):
            if not is_img(ip): continue
            mp = mask_map.get(stem_no_suffix(ip))
            if mp and os.path.exists(mp): self.items.append((ip,mp))
        if not self.items: raise RuntimeError(f"No (image,mask) pairs in {split_dir}")

        self.processor=processor
        self.collapse_to_binary=collapse_to_binary

    def __len__(self): return len(self.items)
    def __getitem__(self, idx):
        ip, mp = self.items[idx]
        image = Image.open(ip).convert("RGB")
        m = np.array(Image.open(mp).convert("L"), dtype=np.uint8)
        if self.collapse_to_binary: m = (m>0).astype(np.uint8)  # 0/1
        enc = self.processor(images=image, segmentation_maps=m, return_tensors="pt")
        return {k:v.squeeze(0) for k,v in enc.items()}

def to_py(o):
    import numpy as np
    if isinstance(o,np.ndarray): return o.tolist()
    if isinstance(o,(np.floating,np.integer)): return o.item()
    return o

def main():
    ap=argparse.ArgumentParser()
    ap.add_argument("--data_dir", default="/workspace/ds_rf")
    ap.add_argument("--output_dir", default="/workspace/segformer-lane")
    ap.add_argument("--epochs", type=int, default=20)
    ap.add_argument("--batch", type=int, default=4)
    ap.add_argument("--ckpt", default="nvidia/segformer-b0-finetuned-ade-512-512")
    ap.add_argument("--binary", action="store_true")
    args=ap.parse_args()

    splits=find_splits(args.data_dir)
    if not splits: raise RuntimeError(f"No train/valid/test under {args.data_dir}")

    class_names=["background","lane"] if args.binary else ["background","lane","lane-dot","lane-mid","lane_crosswalk"]
    id2label={i:n for i,n in enumerate(class_names)}
    label2id={n:i for i,n in id2label.items()}
    num_labels=len(class_names)

    processor=SegformerImageProcessor.from_pretrained(args.ckpt)
    model=SegformerForSemanticSegmentation.from_pretrained(
        args.ckpt, num_labels=num_labels, id2label=id2label, label2id=label2id,
        ignore_mismatched_sizes=True
    )

    train_dir=splits.get("train")
    valid_dir=splits.get("valid") or splits.get("val") or train_dir
    train_ds=RFSegFolder(train_dir, processor, collapse_to_binary=args.binary)
    val_ds  =RFSegFolder(valid_dir,   processor, collapse_to_binary=args.binary)

    metric=evaluate.load("mean_iou")
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        if isinstance(logits,tuple): logits=logits[0]
        lt=torch.from_numpy(logits); yt=torch.from_numpy(labels)
        up=torch.nn.functional.interpolate(lt, size=yt.shape[-2:], mode="bilinear", align_corners=False)
        preds=up.argmax(dim=1).cpu().numpy()
        res=metric.compute(predictions=preds, references=labels, num_labels=num_labels, ignore_index=255, reduce_labels=False)
        return {k:to_py(v) for k,v in res.items()}

    args_tr=TrainingArguments(
        output_dir=args.output_dir, learning_rate=5e-5,
        num_train_epochs=args.epochs,
        per_device_train_batch_size=args.batch,
        per_device_eval_batch_size=args.batch,
        evaluation_strategy="epoch", save_strategy="epoch",
        fp16=torch.cuda.is_available(), logging_steps=50,
        load_best_model_at_end=True, metric_for_best_model="mean_iou",
        greater_is_better=True, report_to="none", seed=42,
    )

    trainer=Trainer(model=model, args=args_tr, train_dataset=train_ds, eval_dataset=val_ds, compute_metrics=compute_metrics)
    trainer.train()

    best=os.path.join(args.output_dir,"best"); os.makedirs(best,exist_ok=True)
    trainer.save_model(best); processor.save_pretrained(best)
    print(f"✅ Saved to {best}")

if __name__=="__main__": main()
'''
open("/workspace/lane_seg/train_segformer.py","w").write(train_py)

# === infer_segformer_video.py 저장 ===
infer_py = r'''
import os, glob, argparse, cv2, numpy as np, torch
from PIL import Image
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

def load_model(model_dir, device):
    processor=SegformerImageProcessor.from_pretrained(model_dir)
    model=SegformerForSemanticSegmentation.from_pretrained(model_dir).to(device).eval()
    return processor, model

@torch.no_grad()
def predict_mask(proc, model, bgr):
    img=Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
    inputs=proc(images=img, return_tensors="pt").to(model.device)
    logits=model(**inputs).logits
    up=torch.nn.functional.interpolate(logits, size=img.size[::-1], mode="bilinear", align_corners=False)
    return up.argmax(dim=1)[0].cpu().numpy().astype(np.uint8)

def overlay(bgr, mask, alpha=0.5):
    color=np.zeros_like(bgr); color[mask==1]=(255,0,0)
    return cv2.addWeighted(bgr,1.0,color,alpha,0)

def write_video(frames, out_path, fps=15):
    h,w=frames[0].shape[:2]
    for fourcc_str in ["mp4v","avc1","XVID"]:
        vw=cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*fourcc_str), fps, (w,h))
        if vw.isOpened():
            for f in frames: vw.write(f)
            vw.release()
            if os.path.exists(out_path) and os.path.getsize(out_path)>0: return
    raise RuntimeError("Video write failed; try different codec/ext.")

def run_on_video(model_dir, input_path, output_path, alpha=0.5, fps=None, device="cuda"):
    device = device if (device=="cuda" and torch.cuda.is_available()) else "cpu"
    proc, model = load_model(model_dir, device)
    cap=cv2.VideoCapture(input_path); assert cap.isOpened(), f"cannot open {input_path}"
    frames=[]; src_fps=cap.get(cv2.CAP_PROP_FPS) or 15; use_fps=fps or src_fps
    while True:
        ok,frame=cap.read()
        if not ok: break
        m=predict_mask(proc, model, frame)
        frames.append(overlay(frame,m,alpha))
    cap.release(); write_video(frames,output_path,use_fps); print("✅ Saved:",output_path)

def run_on_images(model_dir, images_dir, output_path, alpha=0.5, fps=15, device="cuda"):
    device = device if (device=="cuda" and torch.cuda.is_available()) else "cpu"
    proc, model = load_model(model_dir, device)
    paths=sorted([p for p in glob.glob(os.path.join(images_dir,"*")) if os.path.isfile(p)])
    assert paths, f"No images in {images_dir}"
    frames=[]
    for p in paths:
        bgr=cv2.imread(p); 
        if bgr is None: continue
        m=predict_mask(proc, model, bgr)
        frames.append(overlay(bgr,m,alpha))
    write_video(frames,output_path,fps); print("✅ Saved:",output_path)

if __name__=="__main__":
    ap=argparse.ArgumentParser()
    ap.add_argument("--model_dir",required=True)
    ap.add_argument("--input"); ap.add_argument("--images_dir")
    ap.add_argument("--output",required=True)
    ap.add_argument("--alpha",type=float,default=0.5)
    ap.add_argument("--fps",type=int,default=None)
    ap.add_argument("--device",default="cuda")
    args=ap.parse_args()
    if (args.input is None)==(args.images_dir is None):
        raise SystemExit("Use exactly one of --input or --images_dir")
    if args.input:  run_on_video(args.model_dir,args.input,args.output,args.alpha,args.fps,args.device)
    else:           run_on_images(args.model_dir,args.images_dir,args.output,args.alpha,args.fps or 15,args.device)
'''
open("/workspace/lane_seg/infer_segformer_video.py","w").write(infer_py)
print("✅ scripts written to /workspace/lane_seg")

✅ scripts written to /workspace/lane_seg


In [15]:
# 키 이름 패치: evaluation_strategy → eval_strategy (Transformers 5.x)
!sed -i 's/evaluation_strategy/eval_strategy/g' /workspace/lane_seg/train_segformer.py

# 학습 실행
!python /workspace/lane_seg/train_segformer.py \
  --data_dir /workspace/ds_rf \
  --output_dir /workspace/segformer-lane \
  --epochs 20 --batch 4 --binary

  image_processor = cls(**image_processor_dict)
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([2]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([2, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
{'loss': 0.4281, 'grad_norm': 3.0016708374023438, 'learning_rate': 4.9655898876404494e-05, 'epoch': 0.14}
{'loss': 0.294, 'grad_norm': 1.652809739112854, 'learning_rate': 4.930477528089888e-05, 'epoch': 0.28}
{'loss': 0.196, 'grad_norm': 1.5567960739135742, 'learning_rate': 4.895365168539326e-05, 'epoch': 0.42}
{'loss': 0.1499, 'grad_norm': 0.8386121392250061, 'le

In [22]:
#이 셀을 실행하기 전에, 동영상을 먼저 업로드해야 이름을 확인해줌.
import glob, os
print([os.path.basename(p) for p in glob.glob("/workspace/*.mp4")])

['KakaoTalk_20250707_100128756.mp4']


In [23]:
#내가 업로드한 input 동영상을 입력.
!python /workspace/lane_seg/infer_segformer_video.py --model_dir /workspace/segformer-lane/best --input /workspace/KakaoTalk_20250707_100128756.mp4 --output /workspace/out_lane_overlay.mp4 --alpha 0.5 --device cuda

✅ Saved: /workspace/out_lane_overlay.mp4
