In [None]:
# 데이터셋 다운로드 (Penn-Fudan)
import os, urllib.request, tarfile, numpy as np, torch, torchvision, matplotlib.pyplot as plt
from PIL import Image
from torchvision.transforms import functional as F

root = '/content/pennfudan'                     # 데이터 루트
os.makedirs(root, exist_ok=True)                # 폴더 생성
url = 'https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip'  # 공식 링크
zip_path = os.path.join(root, 'PennFudanPed.zip')

if not os.path.exists(zip_path):                # zip이 없으면 다운로드
    urllib.request.urlretrieve(url, zip_path)   # 파일 다운로드

import zipfile
with zipfile.ZipFile(zip_path, 'r') as zf:      # 압축 해제
    zf.extractall(root)


In [None]:
# 데이터셋 클래스 정의
class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PennFudanPed/PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PennFudanPed/PedMasks"))))

    def __getitem__(self, idx):
        # 이미지/마스크 로드
        img_path = os.path.join(self.root, "PennFudanPed/PNGImages", self.imgs[idx])
        mask_path= os.path.join(self.root, "PennFudanPed/PedMasks", self.masks[idx])
        img  = Image.open(img_path).convert("RGB")     # RGB 이미지
        mask = Image.open(mask_path)                   # 인스턴스별 다른 id

        mask = np.array(mask)                          # 마스크를 배열로
        obj_ids = np.unique(mask)[1:]                  # 배경 0 제외
        boxes = []
        for oid in obj_ids:
            pos = np.where(mask == oid)                # 객체 픽셀 위치
            xmin, xmax = np.min(pos[1]), np.max(pos[1])
            ymin, ymax = np.min(pos[0]), np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = torch.ones((len(obj_ids),), dtype=torch.int64)  # 사람=1
        masks  = torch.as_tensor(mask == obj_ids[:, None, None], dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:,1]) * (boxes[:, 2] - boxes[:,0])
        iscrowd = torch.zeros((len(obj_ids),), dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels, "masks": masks,
                  "image_id": image_id, "area": area, "iscrowd": iscrowd}
        if self.transforms is not None:
            img = self.transforms(img)
        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
# 모델: Faster R-CNN (사전학습) 미세튜닝
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn(weights=weights)      # 사전학습 모델
# 분류기 헤드의 num_classes를 2로(배경+사람)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 2)

model = model.to('cuda' if torch.cuda.is_available() else 'cpu')

tfm = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

dataset = PennFudanDataset(root, transforms=tfm)
indices = torch.randperm(len(dataset)).tolist()
train_dataset = torch.utils.data.Subset(dataset, indices[:-20])
test_dataset  = torch.utils.data.Subset(dataset, indices[-20:])

def collate_fn(batch): return tuple(zip(*batch))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
test_loader  = torch.utils.data.DataLoader(test_dataset,  batch_size=1, shuffle=False, collate_fn=collate_fn)

In [None]:
#  학습 루프
import torch.optim as optim
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.train()
for ep in range(2):   # 데모용 2에폭
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets= [{k:v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = model(images, targets)           # detection losses dict
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad(); losses.backward(); optimizer.step()
    print(f"Epoch {ep+1} done. total_loss={losses.item():.3f}")

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch

boxes = pred['boxes'].cpu().numpy()
scores= pred['scores'].cpu().numpy()

thr = 0.6
keep = scores >= thr
boxes, scores = boxes[keep], scores[keep]

# 텐서->이미지 변환 시 .copy() 추가
vis_tensor_cpu = img.mul(torch.tensor([255,255,255]).view(3,1,1)).permute(1,2,0).byte().cpu()
vis = np.array(vis_tensor_cpu).copy()  # <--- .copy()를 추가하여 메모리 레이아웃을 C-contiguous로 변경

for (x1,y1,x2,y2), s in zip(boxes, scores):
    cv2.rectangle(vis, (int(x1),int(y1)), (int(x2),int(y2)), (0,0,255), 2)
    cv2.putText(vis, f"{s:.2f}", (int(x1),int(y1)-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)

plt.figure(figsize=(6,8)); plt.imshow(vis); plt.axis('off'); plt.title('Penn-Fudan(FT)')
plt.show()