In [1]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models.segmentation import deeplabv3_resnet50


INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.12 (you have 1.4.10). Upgrade using: pip install --upgrade albumentations


# 2.데이터 로더 구현

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        # ... (이미지, 마스크 경로 리스트 생성)

    def __getitem__(self, index):
        image_path = self.images_dir[index]
        mask_path = self.masks_dir[index]
        # ... (이미지, 마스크 로딩 및 변환)

    def __len__(self):
        return len(self.images_dir)

# 데이터 로더 생성
train_dataset = CustomDataset(train_images_dir, train_masks_dir, transform=train_transforms)
val_dataset = CustomDataset(val_images_dir, val_masks_dir, transform=val_transforms)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


# 3. 데이터 증강 파이프라인 구축

In [None]:
# 예시: 도로 클래스에 대한 증강
road_transforms = A.Compose([
    A.RandomBrightnessContrast(p=0.2),
    A.GaussianBlur(p=0.2),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.5),
    ToTensorV2()
])


# 4. 모델 정의 및 학습

In [None]:
# DeepLabv3+ 모델 로딩
model = deeplabv3_resnet50(num_classes=25, aux_loss=True)  # 클래스 수에 맞게 조정

# 손실 함수, optimizer 설정
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
for epoch in range(num_epochs):
    for images, masks in train_loader:
        # ... (forward, backward, optimize)
        print("ing")


# 5. 평가

In [None]:
# IoU 계산 함수 정의
def calculate_iou(pred, target):
    # ... (IoU 계산 로직)

# 모델 평가
with torch.no_grad():
    for images, masks in val_loader:
        outputs = model(images)['out']
        preds = torch.argmax(outputs, dim=1)
        iou = calculate_iou(preds, masks)
        print(f"Epoch {epoch+1}/{num_epochs}, IoU: {iou:.4f}")
        # ... (예측, IoU 계산)
        


# deeplabv3+ code

In [5]:
import torch
import torchvision
from torchvision.models.segmentation import deeplabv3_resnet50
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np

import json
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2


def create_mask(label_data, image_shape):
    # 빈 마스크 생성
    mask = np.zeros(image_shape[:2], dtype=np.uint8)

    for obj in label_data['Annotation']:
        # 다각형 좌표 추출
        polygon = np.array(obj['data'][0], dtype=np.int32)
        polygon = polygon.reshape((-1, 1, 2))

        # 마스크에 다각형 그리기
        cv2.fillPoly(mask, [polygon], color=1)

    return mask

# 데이터셋 클래스 (CustomDataset)는 위에서 설명한 대로 구현
# 데이터셋 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        #  이미지와 레이블 파일 목록 생성
        self.image_list = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_list[idx])
        label_path = os.path.join(self.label_dir, self.image_list[idx].replace('.jpg', '.json'))

        # 이미지 읽기
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # 레이블 읽기
        with open(label_path, 'r') as f:
            label_data = json.load(f)
        # 레이블 데이터를 mask 형태로 변환 (여기서 구체적인 로직 구현 필요)
        mask = create_mask(label_data, image.shape)
        # 마스크를 LongTensor로 변환
        mask = torch.from_numpy(mask).long()

        if self.transform:
            augmented = self.transform(image=image) #, mask=mask
            image = augmented['image']
            #mask = augmented['mask']

        return image, mask


# 데이터 증강 설정
transform = A.Compose([
    A.Resize(256, 256),
    A.RandomCrop(224, 224),
    A.HorizontalFlip(p=0.5),
    #ToTensorV2()
])

train_image_dir = "/home/mira/Desktop/KistAIRobot/david/autonomous-driving/project/2DSS/training/images"
train_label_dir = "/home/mira/Desktop/KistAIRobot/david/autonomous-driving/project/2DSS/training/labels"
# 데이터 로더 생성
train_dataset = CustomDataset(train_image_dir, train_label_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)

# 모델 정의
model = deeplabv3_resnet50(num_classes=25, aux_loss=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 손실 함수 및 optimizer 설정
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# 학습
num_epochs = 10
for epoch in range(num_epochs):
    for i, (images, masks) in enumerate(train_loader):
        images = images.to(device)
        masks = masks.to(device)

        # 모델을 학습 모드로 설정
        model.train()

        # 예측
        outputs = model(images)

        # 손실 계산
        loss = criterion(outputs['out'], masks)

        # 역전파
        optimizer.zero_grad()
        loss.backward()

        # 파라미터 업데이트
        optimizer.step()

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# 모델 저장
torch.save(model.state_dict(), "deeplab_model.pth")


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[4, 224, 224, 3] to have 3 channels, but got 224 channels instead

In [14]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image, ImageDraw

import logging


# DeepLabv3+ 모델 정의 (간단한 버전)
class DeepLabV3Plus(nn.Module):
    def __init__(self, num_classes):
        super(DeepLabV3Plus, self).__init__()
        # 실제 구현에서는 더 복잡한 아키텍처가 필요합니다.
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.aspp = nn.Sequential(
            nn.Conv2d(64, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.aspp(x)
        x = self.decoder(x)
        x = F.interpolate(x, size=(256, 256), mode='bilinear', align_corners=False)
        return x

# 커스텀 데이터셋 클래스
class RoadSegmentationDataset(Dataset):
    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, img_name.replace('.jpg', '.json'))

        try:
            image = Image.open(img_path).convert('RGB')
        except OSError:
            print(f"Error opening image: {img_path}")
            # 손상된 이미지 처리 (예: 건너뛰기, 로그 기록 등)
            # 로깅 설정
            logging.basicConfig(filename='error.log', level=logging.ERROR)

            # 오류 발생 시 로그 기록
            logging.error(f"Error loading image: {img_path}, {e}")
        
        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 마스크 생성
        mask = self.create_mask(label_data, image.size)

        if self.transform:
            image = self.transform(image)
            mask = torch.from_numpy(mask).long()
            mask = F.interpolate(mask.unsqueeze(0).float(), size=(256, 256), mode='nearest').squeeze(0).long()

        return image, mask

    def create_mask(self, label_data, image_size):
        mask = np.zeros(image_size[::-1], dtype=np.uint8)
        annotations = sorted(label_data['Annotation'], key=lambda x: self.class_priority(x['class_name']))
        
        for annotation in annotations:
            class_name = annotation['class_name']
            class_id = self.get_class_id(class_name)
            polygon = np.array(annotation['data'][0]).reshape(-1, 2)
            
            
            img = Image.new('L', image_size, 0)
            ImageDraw.Draw(img).polygon(polygon.flatten().tolist(), outline=1, fill=1)
            mask[np.array(img) == 1] = class_id

        return mask

    def class_priority(self, class_name):
        # 클래스 우선순위 정의 (높은 숫자가 높은 우선순위)
        priorities = {
            'road': 10, 'vehicle': 9, 'pedestrian': 8, 'traffic light': 7,
            'traffic sign': 6, 'lane': 5, 'crosswalks': 4, 'sidewalk': 3,
            'vegetation': 2, 'sky': 1
        }
        return priorities.get(class_name, 0)

    def get_class_id(self, class_name):
        # 클래스 이름을 ID로 매핑
        class_map = {
            'road': 1, 'sidewalk': 2, 'crosswalks': 3, 'lane': 4,
            'traffic sign': 5, 'traffic light': 6, 'pole/structural object': 7,
            'building': 8, 'vehicle': 9, 'pedestrian': 10, 'vegetation': 11, 'sky': 12
        }
        return class_map.get(class_name, 0)  # 0은 배경 또는 알 수 없는 클래스

# 데이터 증강 및 변환
def get_transform(train):
    transforms_list = [
        transforms.Resize((256, 256)),  # 512x512에서 256x256으로 변경
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
    if train:
        transforms_list.insert(0, transforms.RandomHorizontalFlip())
        transforms_list.insert(1, transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1))
    return transforms.Compose(transforms_list)

# 데이터 로더 설정
def get_dataloader(image_dir, label_dir, batch_size, train=True):
    dataset = RoadSegmentationDataset(image_dir, label_dir, transform=get_transform(train))
    return DataLoader(dataset, batch_size=batch_size, shuffle=train, num_workers=4)

# 훈련 함수
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images = images.to(device)
            masks = masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
    print("Training complete")

# 메인 실행 코드
if __name__ == "__main__":
    # 경로 설정
    train_image_dir = "training/images"
    train_label_dir = "training/labels"

    # 하이퍼파라미터 설정
    batch_size = 8
    num_classes = 13  # 배경 포함
    learning_rate = 0.001
    num_epochs = 10

    # 데이터 로더 생성
    train_loader = get_dataloader(train_image_dir, train_label_dir, batch_size)

    # 모델, 손실 함수, 옵티마이저 설정
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DeepLabV3Plus(num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 모델 훈련
    train_model(model, train_loader, criterion, optimizer, device, num_epochs)

    # 모델 저장
    torch.save(model.state_dict(), "deeplabv3plus_road_segmentation.pth")

TypeError: RoadSegmentationDataset() takes no arguments

In [12]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image, ImageDraw

# DeepLabv3+ 모델 정의 (간단한 버전)
class DeepLabV3Plus(nn.Module):
    def __init__(self, num_classes):
        super(DeepLabV3Plus, self).__init__()
        # 실제 구현에서는 더 복잡한 아키텍처가 필요합니다.
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.aspp = nn.Sequential(
            nn.Conv2d(64, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.aspp(x)
        x = self.decoder(x)
        x = F.interpolate(x, size=(256, 256), mode='bilinear', align_corners=False)
        return x

# 커스텀 데이터셋 클래스
class RoadSegmentationDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.images = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    def __len__(self):
        return len(self.images)


    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, img_name.replace('.jpg', '.json'))

        image = Image.open(img_path).convert('RGB')
        
        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 마스크 생성
        mask = self.create_mask(label_data, image.size)

        # 이미지와 마스크를 동시에 리사이즈
        image = image.resize((256, 256), Image.BILINEAR)
        mask = Image.fromarray(mask).resize((256, 256), Image.NEAREST)
        
        if self.transform:
            image = self.transform(image)
        
        # 마스크를 텐서로 변환하고 차원 추가
        mask = torch.from_numpy(np.array(mask)).long().unsqueeze(0)

        return image, mask


    def create_mask(self, label_data, image_size):
        mask = np.zeros(image_size[::-1], dtype=np.uint8)
        annotations = sorted(label_data['Annotation'], key=lambda x: self.class_priority(x['class_name']))
        
        for annotation in annotations:
            class_name = annotation['class_name']
            class_id = self.get_class_id(class_name)
            polygon = np.array(annotation['data'][0]).reshape(-1, 2)
            
            img = Image.new('L', image_size, 0)
            ImageDraw.Draw(img).polygon(polygon.flatten().tolist(), outline=1, fill=1)
            mask[np.array(img) == 1] = class_id

        return mask

    def class_priority(self, class_name):
        # 클래스 우선순위 정의 (높은 숫자가 높은 우선순위)
        priorities = {
            'road': 10, 'vehicle': 9, 'pedestrian': 8, 'traffic light': 7,
            'traffic sign': 6, 'lane': 5, 'crosswalks': 4, 'sidewalk': 3,
            'vegetation': 2, 'sky': 1
        }
        return priorities.get(class_name, 0)

    def get_class_id(self, class_name):
        # 클래스 이름을 ID로 매핑
        class_map = {
            'road': 1, 'sidewalk': 2, 'crosswalks': 3, 'lane': 4,
            'traffic sign': 5, 'traffic light': 6, 'pole/structural object': 7,
            'building': 8, 'vehicle': 9, 'pedestrian': 10, 'vegetation': 11, 'sky': 12
        }
        return class_map.get(class_name, 0)  # 0은 배경 또는 알 수 없는 클래스

# 데이터 증강 및 변환
def get_transform(train):
    transforms_list = [
        transforms.Resize((256, 256)),  # 512x512에서 256x256으로 변경
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
    if train:
        transforms_list.insert(0, transforms.RandomHorizontalFlip())
        transforms_list.insert(1, transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1))
    return transforms.Compose(transforms_list)

# 데이터 로더 설정
def get_dataloader(image_dir, label_dir, batch_size, train=True):
    dataset = RoadSegmentationDataset(image_dir, label_dir, transform=get_transform(train))
    return DataLoader(dataset, batch_size=batch_size, shuffle=train, num_workers=4)

# 훈련 함수
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images = images.to(device)
            masks = masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks.squeeze(1))  # squeeze mask's channel dimension
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
    print("Training complete")

# 메인 실행 코드
if __name__ == "__main__":
    # 경로 설정
    train_image_dir = "training/images"
    train_label_dir = "training/labels"

    # 하이퍼파라미터 설정
    batch_size = 8
    num_classes = 13  # 배경 포함
    learning_rate = 0.001
    num_epochs = 10

    # 데이터 로더 생성
    train_loader = get_dataloader(train_image_dir, train_label_dir, batch_size)

    # 모델, 손실 함수, 옵티마이저 설정
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DeepLabV3Plus(num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 모델 훈련
    train_model(model, train_loader, criterion, optimizer, device, num_epochs)

    # 모델 저장
    torch.save(model.state_dict(), "deeplabv3plus_road_segmentation.pth")

Epoch 1/10:  12%|█▏        | 47/396 [01:19<09:50,  1.69s/it]


OSError: Caught OSError in DataLoader worker process 3.
Original Traceback (most recent call last):
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/tmp/ipykernel_233851/2695739857.py", line 59, in __getitem__
    image = Image.open(img_path).convert('RGB')
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/PIL/Image.py", line 995, in convert
    self.load()
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/PIL/ImageFile.py", line 290, in load
    raise OSError(msg)
OSError: image file is truncated (37 bytes not processed)


# UNet code

In [16]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 데이터 경로 설정
data_dir = '/home/mira/Desktop/KistAIRobot/david/autonomous-driving/project/2DSS/training'  # 데이터셋 경로
train_dir = os.path.join(data_dir, 'images')
val_dir = os.path.join(data_dir, 'labels')

# 이미지와 마스크 로딩 함수
def load_data(images_dir, masks_dir):
    images = []
    masks = []
    for filename in os.listdir(images_dir):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            img = cv2.imread(os.path.join(images_dir, filename))
            img = cv2.resize(img, (256, 256))  # 이미지 리사이즈
            images.append(img)

            mask = cv2.imread(os.path.join(masks_dir, filename), cv2.IMREAD_GRAYSCALE)
            mask = cv2.resize(mask, (256, 256))  # 마스크 리사이즈
            masks.append(mask)
    
    return np.array(images), np.array(masks)

# 데이터 로드
X_train, y_train = load_data(train_dir, os.path.join(data_dir, 'masks'))
X_val, y_val = load_data(val_dir, os.path.join(data_dir, 'masks'))

# 정규화
X_train = X_train / 255.0
X_val = X_val / 255.0

# 원-핫 인코딩
num_classes = 3  # 클래스 수 (배경, 도로, 장애물 등)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)

# 데이터 증강
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 모델 정의 (U-Net 예시)
def create_model(input_shape):
    inputs = tf.keras.layers.Input(input_shape)
    conv1 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv3)

    # Bottleneck
    conv4 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)

    # 업샘플링
    up5 = tf.keras.layers.UpSampling2D(size=(2, 2))(conv4)
    merge5 = tf.keras.layers.concatenate([up5, conv3], axis=3)
    conv5 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(merge5)
    conv5 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv5)

    up6 = tf.keras.layers.UpSampling2D(size=(2, 2))(conv5)
    merge6 = tf.keras.layers.concatenate([up6, conv2], axis=3)
    conv6 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(merge6)
    conv6 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv6)

    up7 = tf.keras.layers.UpSampling2D(size=(2, 2))(conv6)
    merge7 = tf.keras.layers.concatenate([up7, conv1], axis=3)
    conv7 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(merge7)
    conv7 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv7)

    outputs = tf.keras.layers.Conv2D(num_classes, (1, 1), activation='softmax')(conv7)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# 모델 생성
model = create_model((256, 256, 3))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(datagen.flow(X_train, y_train, batch_size=32),
          validation_data=(X_val, y_val),
          steps_per_epoch=len(X_train) // 32,
          epochs=50)

# 모델 저장
model.save('autonomous_driving_segmentation_model.h5')


2024-08-02 17:59:35.210781: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-02 17:59:35.230967: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-02 17:59:35.237055: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-02 17:59:35.251827: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Exception ignored in: <function _MultiProcessingDataL

error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [1]:
import os
import json
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 데이터 경로 설정
TRAINING_IMAGES_PATH = 'training/images/'
TRAINING_LABELS_PATH = 'training/labels/'

# 이미지 크기 설정
IMG_HEIGHT, IMG_WIDTH = 256, 256

# JSON 파일에서 라벨 읽기
def load_annotations(label_path):
    with open(label_path) as f:
        return json.load(f)

# 사용자 정의 데이터셋 클래스
class CustomDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images_path = images_path
        self.labels_path = labels_path
        self.transform = transform
        self.images = []
        self.labels = []

        # 라벨 파일 읽기
        for label_file in os.listdir(labels_path):
            if label_file.endswith('.json'):
                annotation = load_annotations(os.path.join(labels_path, label_file))
                image_name = annotation['image_name']
                image_path = os.path.join(images_path, image_name)
                self.images.append(image_path)

                # 라벨 처리: 우선순위가 높은 것만 선택
                for ann in annotation['Annotation']:
                    if ann['class_name'] == 'road':
                        self.labels.append(1)  # "road" 클래스에 대한 라벨
                    else:
                        self.labels.append(0)  # 나머지 클래스는 0으로 설정

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = cv2.imread(self.images[idx])
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))  # 크기 조정
        image = image / 255.0  # 정규화
        image = np.transpose(image, (2, 0, 1))  # (H, W, C) -> (C, H, W)

        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 데이터 증강 설정
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor()
])

# 데이터셋 및 데이터로더 생성
dataset = CustomDataset(TRAINING_IMAGES_PATH, TRAINING_LABELS_PATH, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# 모델 정의 (예시)
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2), 64)
        self.fc2 = torch.nn.Linear(64, 1)  # 이진 분류를 위한 출력층

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = x.view(-1, 32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2))
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# 모델 초기화 및 손실 함수, 옵티마이저 설정
model = SimpleCNN()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 훈련
num_epochs = 10
for epoch in range(num_epochs):
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.view(-1), labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


[ WARN:0@18.849] global loadsave.cpp:241 findDecoder imread_('training/images/N_SFG_230719_053_FC_042.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [2]:
import os
import json
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 데이터 경로 설정
TRAINING_IMAGES_PATH = 'training/images/'
TRAINING_LABELS_PATH = 'training/labels/'

# 이미지 크기 설정
IMG_HEIGHT, IMG_WIDTH = 256, 256

# JSON 파일에서 라벨 읽기
def load_annotations(label_path):
    with open(label_path) as f:
        return json.load(f)

# 사용자 정의 데이터셋 클래스
class CustomDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images_path = images_path
        self.labels_path = labels_path
        self.transform = transform
        self.images = []
        self.labels = []

        # 라벨 파일 읽기
        for label_file in os.listdir(labels_path):
            if label_file.endswith('.json'):
                annotation = load_annotations(os.path.join(labels_path, label_file))
                image_name = annotation['image_name']
                image_path = os.path.join(images_path, image_name)
                self.images.append(image_path)

                # 라벨 처리: 우선순위가 높은 것만 선택
                for ann in annotation['Annotation']:
                    if ann['class_name'] == 'road':
                        self.labels.append(1)  # "road" 클래스에 대한 라벨
                    else:
                        self.labels.append(0)  # 나머지 클래스는 0으로 설정

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = cv2.imread(self.images[idx])
        if image is None:
            raise ValueError(f"Image not found or couldn't be loaded: {self.images[idx]}")
        
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))  # 크기 조정
        image = image / 255.0  # 정규화
        image = np.transpose(image, (2, 0, 1))  # (H, W, C) -> (C, H, W)

        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 데이터 증강 설정
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor()
])

# 데이터셋 및 데이터로더 생성
dataset = CustomDataset(TRAINING_IMAGES_PATH, TRAINING_LABELS_PATH, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# 모델 정의 (예시)
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2), 64)
        self.fc2 = torch.nn.Linear(64, 1)  # 이진 분류를 위한 출력층

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = x.view(-1, 32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2))
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# 모델 초기화 및 손실 함수, 옵티마이저 설정
model = SimpleCNN()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 훈련
num_epochs = 10
for epoch in range(num_epochs):
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.view(-1), labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


ValueError: pic should not have > 4 channels. Got 256 channels.

In [3]:
import os
import json
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 데이터 경로 설정
TRAINING_IMAGES_PATH = 'training/images/'
TRAINING_LABELS_PATH = 'training/labels/'

# 이미지 크기 설정
IMG_HEIGHT, IMG_WIDTH = 256, 256

# JSON 파일에서 라벨 읽기
def load_annotations(label_path):
    with open(label_path) as f:
        return json.load(f)

# 사용자 정의 데이터셋 클래스
class CustomDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images_path = images_path
        self.labels_path = labels_path
        self.transform = transform
        self.images = []
        self.labels = []

        # 라벨 파일 읽기
        for label_file in os.listdir(labels_path):
            if label_file.endswith('.json'):
                annotation = load_annotations(os.path.join(labels_path, label_file))
                image_name = annotation['image_name']
                image_path = os.path.join(images_path, image_name)
                self.images.append(image_path)

                # 라벨 처리: 우선순위가 높은 것만 선택
                for ann in annotation['Annotation']:
                    if ann['class_name'] == 'road':
                        self.labels.append(1)  # "road" 클래스에 대한 라벨
                    else:
                        self.labels.append(0)  # 나머지 클래스는 0으로 설정

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = cv2.imread(self.images[idx], cv2.IMREAD_COLOR)  # RGB로 불러오기
        if image is None:
            raise ValueError(f"Image not found or couldn't be loaded: {self.images[idx]}")
        
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))  # 크기 조정
        
        # 채널 수 확인 및 처리
        if image.shape[2] > 3:
            image = image[..., :3]  # 첫 3개 채널만 유지
        
        image = image / 255.0  # 정규화
        image = np.transpose(image, (2, 0, 1))  # (H, W, C) -> (C, H, W)

        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 데이터 증강 설정
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor()
])

# 데이터셋 및 데이터로더 생성
dataset = CustomDataset(TRAINING_IMAGES_PATH, TRAINING_LABELS_PATH, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# 모델 정의 (예시)
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2), 64)
        self.fc2 = torch.nn.Linear(64, 1)  # 이진 분류를 위한 출력층

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = x.view(-1, 32 * (IMG_HEIGHT // 2) * (IMG_WIDTH // 2))
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# 모델 초기화 및 손실 함수, 옵티마이저 설정
model = SimpleCNN()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 훈련
num_epochs = 10
for epoch in range(num_epochs):
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.view(-1), labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


[ WARN:0@545.791] global loadsave.cpp:241 findDecoder imread_('training/images/S_DRH_230704_007_FC_028.jpg'): can't open/read file: check file path/integrity


ValueError: Image not found or couldn't be loaded: training/images/S_DRH_230704_007_FC_028.jpg

In [4]:
import os
import json
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models.segmentation import deeplabv3_resnet50
import albumentations as A
from albumentations.pytorch import ToTensorV2





# 데이터셋 클래스 정의
class AutonomousDrivingDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images_dir = os.path.join(root_dir, 'images')
        self.labels_dir = os.path.join(root_dir, 'labels')
        self.image_files = sorted(os.listdir(self.images_dir))

    def __len__(self):
        return len(self.image_files)

    #def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.json'))

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 우선순위에 따라 애노테이션 처리
        annotations = sorted(label_data['Annotation'], key=lambda x: label_data['Annotation'].index(x), reverse=True)
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.int64)

        class_mapping = {
            'road': 1, 'sidewalk': 2, 'road roughness': 3, 'road boundaries': 4, 'crosswalks': 5,
            'lane': 6, 'road color guide': 7, 'road marking': 8, 'parking': 9, 'traffic sign': 10,
            'traffic light': 11, 'pole/structural object': 12, 'building': 13, 'tunnel': 14,
            'bridge': 15, 'pedestrian': 16, 'vehicle': 17, 'bicycle': 18, 'motorcycle': 19,
            'personal mobility': 20, 'dynamic': 21, 'vegetation': 22, 'sky': 23, 'static': 24
        }

        for annotation in annotations:
            points = np.array(annotation['data'][0]).reshape((-1, 2))
            class_name = annotation['class_name']
            class_id = class_mapping.get(class_name, 0)  # 0은 배경 클래스
            cv2.fillPoly(mask, [points.astype(np.int32)], class_id)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask
    #def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.json'))

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 우선순위에 따라 애노테이션 처리
        annotations = sorted(label_data['Annotation'], key=lambda x: label_data['Annotation'].index(x), reverse=True)
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)  # 여기를 수정

        class_mapping = {
            'road': 1, 'sidewalk': 2, 'road roughness': 3, 'road boundaries': 4, 'crosswalks': 5,
            'lane': 6, 'road color guide': 7, 'road marking': 8, 'parking': 9, 'traffic sign': 10,
            'traffic light': 11, 'pole/structural object': 12, 'building': 13, 'tunnel': 14,
            'bridge': 15, 'pedestrian': 16, 'vehicle': 17, 'bicycle': 18, 'motorcycle': 19,
            'personal mobility': 20, 'dynamic': 21, 'vegetation': 22, 'sky': 23, 'static': 24
        }

        for annotation in annotations:
            points = np.array(annotation['data'][0]).reshape((-1, 1, 2))  # 여기를 수정
            class_name = annotation['class_name']
            class_id = class_mapping.get(class_name, 0)  # 0은 배경 클래스
            cv2.fillPoly(mask, [points.astype(np.int32)], class_id)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.json'))

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 우선순위에 따라 애노테이션 처리
        annotations = sorted(label_data['Annotation'], key=lambda x: label_data['Annotation'].index(x), reverse=True)
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.int64)  # 여기를 수정

        class_mapping = {
            'road': 1, 'sidewalk': 2, 'road roughness': 3, 'road boundaries': 4, 'crosswalks': 5,
            'lane': 6, 'road color guide': 7, 'road marking': 8, 'parking': 9, 'traffic sign': 10,
            'traffic light': 11, 'pole/structural object': 12, 'building': 13, 'tunnel': 14,
            'bridge': 15, 'pedestrian': 16, 'vehicle': 17, 'bicycle': 18, 'motorcycle': 19,
            'personal mobility': 20, 'dynamic': 21, 'vegetation': 22, 'sky': 23, 'static': 24
        }

        for annotation in annotations:
            points = np.array(annotation['data'][0]).reshape((-1, 1, 2))
            class_name = annotation['class_name']
            class_id = class_mapping.get(class_name, 0)  # 0은 배경 클래스
            cv2.fillPoly(mask, [points.astype(np.int32)], class_id)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask.long()  # 여기를 수정
# 데이터 증강 정의

train_transform = A.Compose([
    A.RandomRotate90(),
    A.Flip(),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.OneOf([
        A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.8, alpha_coef=0.1, p=0.5),
        A.RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.5),
        A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=0.5),
    ], p=0.3),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
], additional_targets={'mask': 'mask'})

# 데이터셋 및 데이터로더 생성
train_dataset = AutonomousDrivingDataset(root_dir='training', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

# 모델 정의
model = deeplabv3_resnet50(weights=None, num_classes=25)  # 24개 클래스 + 배경

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 함수
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for images, masks in dataloader:
        images, masks = images.to(device), masks.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)['out']
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# 학습 실행
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}')

# 모델 저장
torch.save(model.state_dict(), 'autonomous_driving_segmentation_model.pth')

error: Caught error in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/tmp/ipykernel_45618/3191233387.py", line 127, in __getitem__
    cv2.fillPoly(mask, [points.astype(np.int32)], class_id)
cv2.error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'fillPoly'
> Overload resolution failed:
>  - Layout of the output array img is incompatible with cv::Mat
>  - Expected Ptr<cv::UMat> for argument 'img'



train_transform = A.Compose([
    A.RandomRotate90(),
    A.Flip(),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.OneOf([
        A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.8, alpha_coef=0.1, p=0.5),
        A.RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.5),
        A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=0.5),
    ], p=0.3),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [1]:
import os
import json
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.models.segmentation import deeplabv3_resnet50
import albumentations as A
from albumentations.pytorch import ToTensorV2

# 데이터셋 클래스 정의
class AutonomousDrivingDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images_dir = os.path.join(root_dir, 'images')
        self.labels_dir = os.path.join(root_dir, 'labels')
        self.image_files = sorted(os.listdir(self.images_dir))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.json'))

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        annotations = sorted(label_data['Annotation'], key=lambda x: label_data['Annotation'].index(x), reverse=True)
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)  # dtype을 np.uint8로 변경

        class_mapping = {
            'road': 1, 'sidewalk': 2, 'road roughness': 3, 'road boundaries': 4, 'crosswalks': 5,
            'lane': 6, 'road color guide': 7, 'road marking': 8, 'parking': 9, 'traffic sign': 10,
            'traffic light': 11, 'pole/structural object': 12, 'building': 13, 'tunnel': 14,
            'bridge': 15, 'pedestrian': 16, 'vehicle': 17, 'bicycle': 18, 'motorcycle': 19,
            'personal mobility': 20, 'dynamic': 21, 'vegetation': 22, 'sky': 23, 'static': 24
        }

        for annotation in annotations:
            points = np.array(annotation['data'][0]).reshape((-1, 1, 2)).astype(np.int32)  # points를 np.int32로 변환
            class_name = annotation['class_name']
            class_id = class_mapping.get(class_name, 0)
            cv2.fillPoly(mask, [points], class_id)  # fillPoly 호출

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask.long()

# 데이터 증강 정의
train_transform = A.Compose([
    A.RandomRotate90(),
    A.Flip(),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.OneOf([
        A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.8, alpha_coef=0.1, p=0.5),
        A.RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.5),
        A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=0.5),
    ], p=0.3),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터셋 및 데이터로더 생성
train_dataset = AutonomousDrivingDataset(root_dir='training', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

# 모델 정의
model = deeplabv3_resnet50(weights=None, num_classes=25)  # 24개 클래스 + 배경

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 함수
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for images, masks in dataloader:
        images, masks = images.to(device), masks.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)['out']
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# 학습 실행
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}')

# 모델 저장
torch.save(model.state_dict(), 'autonomous_driving_segmentation_model.pth')


Premature end of JPEG file


Epoch 1/50, Loss: 0.5522


In [2]:
import os
import json
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models.segmentation import deeplabv3_resnet50
import albumentations as A
from albumentations.pytorch import ToTensorV2

# 데이터셋 클래스 정의
class AutonomousDrivingDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images_dir = os.path.join(root_dir, 'images')
        self.labels_dir = os.path.join(root_dir, 'labels')
        self.image_files = sorted(os.listdir(self.images_dir))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.json'))

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        # 우선순위에 따라 애노테이션 처리
        annotations = sorted(label_data['Annotation'], key=lambda x: label_data['Annotation'].index(x), reverse=True)
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.int64)

        class_mapping = {
            'road': 1, 'sidewalk': 2, 'road roughness': 3, 'road boundaries': 4, 'crosswalks': 5,
            'lane': 6, 'road color guide': 7, 'road marking': 8, 'parking': 9, 'traffic sign': 10,
            'traffic light': 11, 'pole/structural object': 12, 'building': 13, 'tunnel': 14,
            'bridge': 15, 'pedestrian': 16, 'vehicle': 17, 'bicycle': 18, 'motorcycle': 19,
            'personal mobility': 20, 'dynamic': 21, 'vegetation': 22, 'sky': 23, 'static': 24
        }

        for annotation in annotations:
            points = np.array(annotation['data'][0]).reshape((-1, 2))
            class_name = annotation['class_name']
            class_id = class_mapping.get(class_name, 0)  # 0은 배경 클래스
            cv2.fillPoly(mask, [points.astype(np.int32)], class_id)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

# 데이터 증강 정의
train_transform = A.Compose([
    A.RandomRotate90(),
    A.Flip(),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.OneOf([
        A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.8, alpha_coef=0.1, p=0.5),
        A.RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.5),
        A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=0.5),
    ], p=0.3),
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
        A.GaussianBlur(blur_limit=(3, 7), p=0.5),
    ], p=0.3),
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 데이터셋 및 데이터로더 생성
train_dataset = AutonomousDrivingDataset(root_dir='training', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

# 모델 정의
model = deeplabv3_resnet50(pretrained=False, num_classes=25)  # 24개 클래스 + 배경

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 함수
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for images, masks in dataloader:
        images, masks = images.to(device), masks.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)['out']
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# 학습 실행
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}')

# 모델 저장
torch.save(model.state_dict(), 'autonomous_driving_segmentation_model.pth')




error: Caught error in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/mira/anaconda3/envs/kistAI/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/tmp/ipykernel_6637/4215128449.py", line 53, in __getitem__
    cv2.fillPoly(mask, [points.astype(np.int32)], class_id)
cv2.error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'fillPoly'
> Overload resolution failed:
>  - Layout of the output array img is incompatible with cv::Mat
>  - Expected Ptr<cv::UMat> for argument 'img'

