In [4]:
!pip install segmentation_models_pytorch

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pretrainedmodels==0.7.4 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting efficientnet-pytorch==0.7.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm==0.9.2 (from segmentation_models_pytorch)
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segme

In [14]:
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import os
import numpy as np
import random
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
from torchvision.models.segmentation import fcn_resnet101
from torchvision.models.segmentation import deeplabv3_resnet101
from torchvision.models.segmentation import FCN_ResNet101_Weights
import segmentation_models_pytorch as smp

#증강 안했을 때 # 출력형식 다른 나머지

def create_model(model_name, num_classes):
    if model_name == 'unet':
        model = smp.Unet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'pspnet':
        model = smp.PSPNet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'deeplabv3plus':
        model = smp.DeepLabV3Plus(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'linknet':
        model = smp.Linknet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    else:
        print("Unsupported model: {}".format(model_name))
        return None
    return model

class SegmentationDataset(Dataset):
    def __init__(self, image_dir, label_dir, image_list, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.images = image_list

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.images[idx].replace('.jpg', '_mask.png'))
        image = Image.open(img_path).convert('RGB')
        label = Image.open(label_path).convert('L')
        if self.transform:
            image = self.transform(image)
            label = self.transform(label) * 255
        return image, label.squeeze().long()

def split_dataset(image_dir, test_ratio=0.2):
    images = os.listdir(image_dir)
    random.shuffle(images)
    split_idx = int(len(images) * (1 - test_ratio))
    return images[:split_idx], images[split_idx:]

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])
train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

image_dir = '/content/drive/MyDrive/Colab Notebooks/segmentation/images'
label_dir = '/content/drive/MyDrive/Colab Notebooks/segmentation/labels'
train_images, test_images = split_dataset(image_dir)
train_dataset = SegmentationDataset(image_dir=image_dir, label_dir=label_dir, image_list=train_images, transform=train_transforms)
test_dataset = SegmentationDataset(image_dir, label_dir, test_images, transform=test_transforms)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_names = ["unet", "pspnet", "deeplabv3plus", "linknet"]
num_classes = 4

for model_name in model_names:
    model = create_model(model_name, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    num_epochs = 25

    for epoch in range(num_epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'{model_name} - Epoch {epoch+1}, Loss: {loss.item()}')

    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            labels = labels.cpu().numpy()
            all_preds.extend(preds.flatten())
            all_labels.extend(labels.flatten())
    accuracy = accuracy_score(all_labels, all_preds)
    print(f'{model_name} - Test Accuracy: {accuracy}')

unet - Epoch 1, Loss: 0.4258987009525299
unet - Epoch 2, Loss: 0.3520199954509735
unet - Epoch 3, Loss: 0.3343725800514221
unet - Epoch 4, Loss: 0.7797642946243286
unet - Epoch 5, Loss: 0.1825096309185028
unet - Epoch 6, Loss: 0.14660552144050598
unet - Epoch 7, Loss: 0.15887451171875
unet - Epoch 8, Loss: 0.1435546576976776
unet - Epoch 9, Loss: 0.17108339071273804
unet - Epoch 10, Loss: 0.19695845246315002
unet - Epoch 11, Loss: 0.11237483471632004
unet - Epoch 12, Loss: 0.16465617716312408
unet - Epoch 13, Loss: 0.08306649327278137
unet - Epoch 14, Loss: 0.08586934208869934
unet - Epoch 15, Loss: 0.08522182703018188
unet - Epoch 16, Loss: 0.11206576228141785
unet - Epoch 17, Loss: 0.08915679156780243
unet - Epoch 18, Loss: 0.08836173266172409
unet - Epoch 19, Loss: 0.07134784758090973
unet - Epoch 20, Loss: 0.05095013603568077
unet - Epoch 21, Loss: 0.07222555577754974
unet - Epoch 22, Loss: 0.056603118777275085
unet - Epoch 23, Loss: 0.28968507051467896
unet - Epoch 24, Loss: 0.118

In [None]:
import torchvision.transforms as T
from torchvision.models.detection import maskrcnn_resnet50_fpn
from PIL import Image, ImageDraw
import numpy as np
import torch

# 사전 훈련된 Mask R-CNN 모델 불러오기
model = maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

# 클래스 라벨
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# 객체 감지 및 분할 함수
def detect_objects(image_path):
    # 이미지 불러오기
    img = Image.open(image_path)

    # 전처리
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(img)

    # 객체 감지 및 분할
    with torch.no_grad():
        prediction = model([img_tensor])[0]

    # 결과 추출
    masks = prediction['masks']
    labels = prediction['labels']
    scores = prediction['scores']

    # 추론 결과를 필터링하여 반환
    detections = []
    for mask, label, score in zip(masks, labels, scores):
        # 신뢰도가 일정 수준 이상인 경우에만 결과에 추가
        if score >= 0.5:
            mask = mask[0]  # 배치 차원 제거
            detections.append({'mask': mask, 'label': COCO_INSTANCE_CATEGORY_NAMES[label.item()], 'score': score.item()})

    return detections
# 객체 감지 및 분할 함수
def detect_objects(image_path):
    # 이미지 불러오기
    img = Image.open(image_path)

    # 전처리
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(img)

    # 객체 감지 및 분할
    with torch.no_grad():
        prediction = model([img_tensor])[0]

    # 결과 추출
    masks = prediction['masks']
    labels = prediction['labels']
    scores = prediction['scores']
    boxes = prediction['boxes']

    # 추론 결과를 필터링하여 반환
    detections = []
    for mask, label, score, box in zip(masks, labels, scores, boxes):
        # 신뢰도가 일정 수준 이상인 경우에만 결과에 추가
        if score >= 0.5:
            mask = mask[0]  # 배치 차원 제거
            detections.append({'mask': mask, 'label': COCO_INSTANCE_CATEGORY_NAMES[label.item()], 'score': score.item(), 'box': box.tolist()})

    return detections
# 객체 감지 및 분할 함수
def detect_objects(image_path):
    # 이미지 불러오기
    img = Image.open(image_path)

    # 전처리
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(img)

    # 객체 감지 및 분할
    with torch.no_grad():
        prediction = model([img_tensor])[0]

    # 결과 추출
    masks = prediction['masks']
    labels = prediction['labels']
    scores = prediction['scores']
    boxes = prediction['boxes']

    # 추론 결과를 필터링하여 반환 (사람만)
    detections = []
    for mask, label, score, box in zip(masks, labels, scores, boxes):
        # 신뢰도가 일정 수준 이상이고 라벨이 사람인 경우에만 결과에 추가
        if score >= 0.5 and label == 1:  # 1은 사람에 해당하는 라벨
            mask = mask[0]  # 배치 차원 제거
            detections.append({'mask': mask, 'label': COCO_INSTANCE_CATEGORY_NAMES[label.item()], 'score': score.item(), 'box': box.tolist()})

    return detections
def convert_box_to_center(box, image_width, image_height):
    # 박스 좌표를 중심점 좌표와 너비, 높이로 변환
    x_center = ((box[0] + box[2]) / 2) / image_width
    y_center = ((box[1] + box[3]) / 2) / image_height
    width = (box[2] - box[0]) / image_width
    height = (box[3] - box[1]) / image_height
    return x_center, y_center, width, height

# 객체 감지 및 분할 결과 텍스트로 출력 함수 (사람만)
def print_detection_results(detections, image_width, image_height):
    for i, detection in enumerate(detections, start=1):
        label = detection['label']
        score = detection['score']
        box = detection['box']

        # 중심점 좌표와 너비, 높이 계산
        x_center, y_center, width, height = convert_box_to_center(box, image_width, image_height)

        print(f"{x_center:f} {y_center:f} {width:f} {height:f}")

# 이미지 파일 경로
image_path = "/content/drive/MyDrive/Colab Notebooks/detection/images/240116133524-0043.jpg"

# 이미지 불러오기
img = Image.open(image_path)
image_width, image_height = img.size

# 객체 감지 및 분할 수행
detections = detect_objects(image_path)

# 객체 감지 및 분할 결과 텍스트로 출력 (사람만)
print_detection_results(detections, image_width, image_height)


0.448560 0.475042 0.016446 0.064658
0.319735 0.490922 0.013721 0.063306
0.655321 0.286448 0.010210 0.047385
0.133461 0.323387 0.009756 0.042698
0.304779 0.257048 0.009210 0.047592
0.427931 0.150444 0.007344 0.032236
0.159207 0.493104 0.005928 0.032146
0.192017 0.329184 0.012821 0.037121


In [15]:
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import os
import numpy as np
import random
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
from torchvision.models.segmentation import fcn_resnet101
from torchvision.models.segmentation import deeplabv3_resnet101
from torchvision.models.segmentation import FCN_ResNet101_Weights
import segmentation_models_pytorch as smp

def create_model(model_name, num_classes):
    if model_name == 'unet':
        model = smp.Unet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'pspnet':
        model = smp.PSPNet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'deeplabv3plus':
        model = smp.DeepLabV3Plus(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    elif model_name == 'linknet':
        model = smp.Linknet(encoder_name="resnet34", classes=num_classes, encoder_weights="imagenet")
    else:
        print("Unsupported model: {}".format(model_name))
        return None
    return model

class SegmentationDataset(Dataset):
    def __init__(self, image_dir, label_dir, image_list, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.images = image_list

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.images[idx].replace('.jpg', '_mask.png'))
        image = Image.open(img_path).convert('RGB')
        label = Image.open(label_path).convert('L')
        if self.transform:
            image = self.transform(image)
            label = self.transform(label) * 255
        return image, label.squeeze().long()

def split_dataset(image_dir, test_ratio=0.2):
    images = os.listdir(image_dir)
    random.shuffle(images)
    split_idx = int(len(images) * (1 - test_ratio))
    return images[:split_idx], images[split_idx:]

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])
train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

image_dir = '/content/drive/MyDrive/Colab Notebooks/segmentation/images'
label_dir = '/content/drive/MyDrive/Colab Notebooks/segmentation/labels'
train_images, test_images = split_dataset(image_dir)
train_dataset = SegmentationDataset(image_dir=image_dir, label_dir=label_dir, image_list=train_images, transform=train_transforms)
test_dataset = SegmentationDataset(image_dir, label_dir, test_images, transform=test_transforms)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_names = ["unet", "pspnet", "deeplabv3plus", "linknet"]
num_classes = 4

for model_name in model_names:
    model = create_model(model_name, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    prev_loss = float('inf')
    patience = 5
    no_improvement_count = 0

    for epoch in range(num_epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'{model_name} - Epoch {epoch+1}, Loss: {loss.item()}')

        # 검증 데이터셋을 사용하여 손실을 계산하고, 조기 종료를 확인
        model.eval()
        with torch.no_grad():
            val_loss = 0
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
            val_loss /= len(test_loader.dataset)

        # 손실이 이전 손실보다 증가했는지 확인하여 조기 종료 조건을 적용
        if val_loss >= prev_loss:
            no_improvement_count += 1
            if no_improvement_count >= patience:
                print(f'Early stopping at epoch {epoch+1} due to no improvement in validation loss.')
                break
        else:
            no_improvement_count = 0
            prev_loss = val_loss

    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            labels = labels.cpu().numpy()
            all_preds.extend(preds.flatten())
            all_labels.extend(labels.flatten())
    accuracy = accuracy_score(all_labels, all_preds)
    print(f'{model_name} - Test Accuracy: {accuracy}')


unet - Epoch 1, Loss: 0.5311014652252197
unet - Epoch 2, Loss: 0.2736879587173462
unet - Epoch 3, Loss: 0.675195574760437
unet - Epoch 4, Loss: 0.2520984411239624
unet - Epoch 5, Loss: 0.18400780856609344
unet - Epoch 6, Loss: 0.222054123878479
unet - Epoch 7, Loss: 0.12370148301124573
unet - Epoch 8, Loss: 0.08995825797319412
unet - Epoch 9, Loss: 0.12083162367343903
unet - Epoch 10, Loss: 0.1239992007613182
unet - Epoch 11, Loss: 0.14760400354862213
unet - Epoch 12, Loss: 0.15693125128746033
unet - Epoch 13, Loss: 0.13098283112049103
unet - Epoch 14, Loss: 0.09710965305566788
unet - Epoch 15, Loss: 0.1107923835515976
unet - Epoch 16, Loss: 0.071767657995224
unet - Epoch 17, Loss: 0.08650293201208115
unet - Epoch 18, Loss: 0.06359551101922989
unet - Epoch 19, Loss: 0.0715545117855072
unet - Epoch 20, Loss: 0.08093225210905075
unet - Epoch 21, Loss: 0.06656844913959503
unet - Epoch 22, Loss: 0.06671416759490967
unet - Epoch 23, Loss: 0.3909984529018402
unet - Epoch 24, Loss: 0.03744671

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')