In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from os import *
from pathlib import *
import numpy as np
from PIL import Image

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
__file__ = Path(getcwd())

In [4]:
root = path.join(__file__.parent, 'data')
images_path = path.join(root, "PNGImages")
masks_path = path.join(root, "PedMasks")
images = listdir(images_path)
masks = listdir(masks_path)

def get(idx):
    image_path = path.join(images_path, images[idx])
    mask_path = path.join(masks_path, masks[idx])

    image = Image.open(image_path).convert("RGB")
    mask = np.array(Image.open(mask_path))

    obj_ids = np.unique(mask)

    return mask, obj_ids

mask, ids = get(0)
mask = mask == mask[:, None, None]
mask.shape

(536, 1, 536, 559)

In [5]:
class PennFudanDataset(data.Dataset):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms

        self.images_path = path.join(self.root, "PNGImages")
        self.images = listdir(self.images_path)

        self.masks_path = path.join(self.root, "PedMasks")
        self.masks = listdir(self.masks_path)

    def __getitem__(self, idx):
        img_path = path.join(self.images_path, self.images[idx])
        mask_path = path.join(self.masks_path, self.masks[idx])

        img = Image.open(img_path).convert("RGB")
        mask = np.array(Image.open(mask_path))

        obj_ids = np.unique(mask)[1:]   # 0번째 품번 값은 배경의 그레이스케일 값이므로 생략

        mask = (mask == mask[:, None, None])

        boxes = []
        for i in range(len(obj_ids)):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])

            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(obj_ids), ), dtype=torch.int64)
        masks = torch.as_tensor(mask, dtype=torch.uint8)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(obj_ids), ), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.images)
    

In [7]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\rhseung/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:21<00:00, 7.74MB/s] 


In [8]:
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

backbone = torchvision.models.mobilenet_v2(weights="DEFAULT").features
backbone.out_channels = 1280

anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256, 512), ),
    aspect_ratios=((0.5, 1.0, 2.0), )
)

roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    featmap_names=["0"],
    output_size=7,
    sampling_ratio=2
)

model = FasterRCNN(
    backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler
)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to C:\Users\rhseung/.cache\torch\hub\checkpoints\mobilenet_v2-7ebf99e0.pth
100%|██████████| 13.6M/13.6M [00:02<00:00, 6.35MB/s]


In [9]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

    return model

In [11]:
import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor()) 
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))

    return T.Compose(transforms)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\rhseung\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\rhseung\AppData\Local\Temp\ipykernel_11792\1321818385.py", line 1, in <module>
    import transforms as T
  File "c:\Users\rhseung\AppData\Local\Programs\Python\Python311\Lib\site-packages\transforms\__init__.py", line 1, in <module>
    from .safe_html import safe_html, bodyfinder
  File "c:\Users\rhseung\AppData\Local\Programs\Python\Python311\Lib\site-packages\transforms\safe_html.py", line 1, in <module>
    from sgmllib import SGMLParser, SGMLParseError
ModuleNotFoundError: No module named 'sgmllib'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\rhseung\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 2102, in showtraceback
    stb = self