Reference: https://www.kaggle.com/code/ekaterinadranitsyna/segformer-water-segmentation-pytorch/notebook

In [None]:
# !pip install transformers datasets
# !pip install --upgrade sympy
# !pip install --upgrade datasets
# !pip install evaluate
# !pip install transformers
# !pip install torchmetrics

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
import os
from datasets import Dataset
from transformers import TrainingArguments, Trainer
import torch
from PIL import Image
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

processor = SegformerImageProcessor()

# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# 데이터 경로 설정
train_images_dir = "/WD/content/car_damage_data_v/train/image"
train_masks_dir = "/WD/content/car_damage_data_v/masks/train"

val_images_dir = "/WD/content/car_damage_data_v/valid/image"
val_masks_dir = "/WD/content/car_damage_data_v/masks/valid"

# 배경 클래스 포함
label_mapping = {'Background': 0, 'Damaged': 1}

# id2label 및 label2id 생성
id2label = {v: k for k, v in label_mapping.items()}  # ID -> Label
label2id = {k: v for k, v in label_mapping.items()}  # Label -> ID

# 출력 확인
print("id2label:", id2label)
print("label2id:", label2id)

# 라벨 개수
num_labels = len(id2label)
print("Number of labels:", num_labels)

  from .autonotebook import tqdm as notebook_tqdm


cuda
id2label: {0: 'Background', 1: 'Damaged'}
label2id: {'Background': 0, 'Damaged': 1}
Number of labels: 2


In [27]:
# Model

model_name = "nvidia/mit-b3"
processor = SegformerImageProcessor.from_pretrained(model_name)
model = SegformerForSemanticSegmentation.from_pretrained(
    model_name,
    num_labels = 2,
    id2label=id2label,
    label2id=label2id

)

model.to(device)  # 모델을 GPU로 이동

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b3 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SegformerForSemanticSegmentation(
  (segformer): SegformerModel(
    (encoder): SegformerEncoder(
      (patch_embeddings): ModuleList(
        (0): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (1): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        )
        (2): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
        )
        (3): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)

In [4]:
# Load Dataset & Preprocessing

def load_data(images_dir, masks_dir):
    images = sorted(os.listdir(images_dir))
    masks = sorted(os.listdir(masks_dir))

    data = []
    for img_name, mask_name in zip(images, masks):
        image_path = os.path.join(images_dir, img_name)
        mask_path = os.path.join(masks_dir, mask_name)
        data.append({"image": image_path, "mask": mask_path})
    return data

train_data = load_data(train_images_dir, train_masks_dir)
val_data = load_data(val_images_dir, val_masks_dir)

# 데이터셋 준비
def preprocess(example):
    image = Image.open(example["image"]).convert("RGB").resize((512, 512))  # 512x512로 축소
    mask = Image.open(example["mask"]).resize((512, 512))
    encoding = processor(image, mask, return_tensors="pt")
    encoding = {k: v.squeeze(0).to(device) for k, v in encoding.items()}
    return encoding


train_dataset = Dataset.from_list(train_data).map(preprocess)
val_dataset = Dataset.from_list(val_data).map(preprocess)

Map: 100%|██████████| 7062/7062 [04:48<00:00, 24.44 examples/s]  
Map: 100%|██████████| 1513/1513 [00:56<00:00, 26.92 examples/s]


In [5]:
from torch.utils.data import DataLoader

batch_size = 4

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# 데이터 처리
train_dataset.set_format(type='torch', columns=['pixel_values', 'labels'])
val_dataset.set_format(type='torch', columns=['pixel_values', 'labels'])

In [6]:
# Evaluation Metrics
import torch
from torch import nn
import evaluate

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=len(id2label),
        ignore_index = None,
        reduce_labels=processor.do_reduce_labels,
    )

    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

    return metrics

### new loss

In [13]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        """
        Focal Loss를 초기화합니다.
        :param alpha: 클래스 가중치 (default = 0.25)
        :param gamma: 포커스 파라미터 (default = 2.0)
        """
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ce_loss = nn.CrossEntropyLoss(reduction='none')

    def forward(self, outputs, targets):
        """
        Focal Loss 계산
        :param outputs: 모델 출력 logits (batch_size, num_classes, height, width)
        :param targets: 타겟 라벨 (batch_size, height, width)
        """
        # CrossEntropyLoss 계산
        ce_loss = self.ce_loss(outputs, targets)

        # Probabilities
        pt = torch.exp(-ce_loss)  # 예측된 확률값
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import Trainer, TrainingArguments, SegformerForSemanticSegmentation
from datasets import load_dataset

# IoU Loss 정의
class IoULoss(nn.Module):
    def forward(self, outputs, targets):
        smooth = 1.0
        outputs = torch.sigmoid(outputs)  # Sigmoid 활성화 함수
        intersection = (outputs * targets).sum()
        total = (outputs + targets).sum()
        union = total - intersection
        iou = (intersection + smooth) / (union + smooth)
        return 1 - iou  # IoU를 Loss로 변환

# Dice Loss 정의
class DiceLoss(nn.Module):
    def forward(self, outputs, targets):
        smooth = 1.0
        outputs = torch.sigmoid(outputs)  # Sigmoid 활성화 함수
        intersection = (outputs * targets).sum()
        dice = (2. * intersection + smooth) / (outputs.sum() + targets.sum() + smooth)
        return 1 - dice

# IoU Loss와 Dice Loss를 조합
class CombinedLoss(nn.Module):
    def __init__(self, weight_iou=0.7, weight_dice=0.3):
        super().__init__()
        self.iou_loss = IoULoss()
        self.dice_loss = DiceLoss()
        self.weight_iou = weight_iou
        self.weight_dice = weight_dice

    def forward(self, outputs, targets):
        iou_loss = self.iou_loss(outputs, targets)
        dice_loss = self.dice_loss(outputs, targets)
        return self.weight_iou * iou_loss + self.weight_dice * dice_loss
    
class CombinedLoss_three(nn.Module):
    def __init__(self, weight_ce=0.5, weight_iou=0.3, weight_dice=0.2):
        super().__init__()
        self.ce_loss = nn.CrossEntropyLoss()
        self.iou_loss = IoULoss()
        self.dice_loss = DiceLoss()
        self.weight_ce = weight_ce
        self.weight_iou = weight_iou
        self.weight_dice = weight_dice

    def forward(self, outputs, targets):
        # CrossEntropyLoss 계산
        ce_loss = self.ce_loss(outputs, targets)
        # IoU와 Dice Loss 계산 (타겟 차원 맞춤 필요)
        targets_one_hot = F.one_hot(targets, num_classes=outputs.shape[1]).permute(0, 3, 1, 2).float()
        iou_loss = self.iou_loss(outputs, targets_one_hot)
        dice_loss = self.dice_loss(outputs, targets_one_hot)
        # 손실 조합
        return self.weight_ce * ce_loss + self.weight_iou * iou_loss + self.weight_dice * dice_loss


# CombinedLoss 정의
loss_fn = CombinedLoss(weight_iou=0.7, weight_dice=0.3)
loss_three = CombinedLoss_three(weight_ce=0.5, weight_iou=0.3, weight_dice=0.2)


### train

In [9]:
# 실행은 되지만 iou값이 엄청 안좋음
# 
import torch
from tqdm import tqdm
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import evaluate
import logging

# 로그 설정
log_file = "training_log.txt"
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# Hyperparameters
num_epochs = 3
best_val_iou = 0.0  # Best Validation IoU 초기값
best_model_path = "./best_model_new_loss_n_func.pth"
learning_rate = 5e-5
batch_size = 4  # 배치 크기 설정

# Optimizer와 Scheduler 초기화
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # 5 에포크마다 lr 감소

# Loss 정의
segmentation_loss = CombinedLoss(weight_iou=0.9, weight_dice=0.1)

# 학습 루프
for epoch in range(num_epochs):
    logging.info(f"Epoch {epoch + 1}/{num_epochs} started")
    print(f"\nEpoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    train_iou = 0

    # Training 단계
    model.train()
    train_progress_bar = tqdm(train_dataloader, desc="Training", leave=True)

    for batch_idx, batch in enumerate(train_progress_bar):
        optimizer.zero_grad()

        # 데이터 준비
        inputs = batch["pixel_values"].to(device)
        targets = batch["labels"].to(device)

        # 모델 출력
        outputs = model(pixel_values=inputs)
        logits = outputs.logits  # (batch_size, num_classes, height, width)

        # 레이블 크기 조정
        output_size = logits.shape[-2:]  # (height, width)
        resized_targets = F.interpolate(
            targets.unsqueeze(1).float(),
            size=output_size,
            mode="nearest"
        ).squeeze(1).long()  # Resized targets to (batch_size, height, width)

        # 1. outputs에 sigmoid 적용 및 특정 클래스 이진화
        probabilities = torch.sigmoid(logits)  # (batch_size, num_classes, height, width)
        binary_outputs = probabilities[:, 1, :, :]  # 클래스 1의 채널 선택 (batch_size, height, width)

        # 2. targets 이진화 (클래스 1인 픽셀만 1로 변환)
        binary_targets = (resized_targets == 1).float()  # (batch_size, height, width)

        # 손실 계산
        loss = segmentation_loss(binary_outputs, binary_targets)  # CombinedLoss 사용
        loss.backward()
        optimizer.step()

        # IoU 계산
        preds = (binary_outputs > 0.5).float()  # Thresholding
        iou = metric.compute(
            predictions=preds.detach().cpu().numpy(),
            references=binary_targets.detach().cpu().numpy(),
            num_labels=2,
            ignore_index=None
        )["mean_iou"]

        # 손실 및 IoU 누적
        train_loss += loss.item()
        train_iou += iou

        # 로그 파일 기록 (배치 단위)
        logging.info(f"Epoch {epoch + 1}/{num_epochs}, Batch {batch_idx + 1}, Training Loss: {loss.item():.4f}, Training mIoU: {iou:.4f}")

        train_progress_bar.set_postfix({"Batch Loss": loss.item(), "Batch mIoU": iou})

    avg_train_loss = train_loss / len(train_dataloader)
    avg_train_iou = train_iou / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    # 로그 파일 기록 (Training 에포크 단위)
    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    # Validation 단계
    model.eval()
    val_loss = 0
    val_iou = 0
    val_progress_bar = tqdm(val_dataloader, desc="Validation", leave=True)

    with torch.no_grad():
        for batch_idx, batch in enumerate(val_progress_bar):
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)
            targets = batch["labels"].to(device)

            # 모델 출력
            outputs = model(pixel_values=inputs)
            logits = outputs.logits

            # 레이블 크기 조정
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=logits.shape[-2:],
                mode="nearest"
            ).squeeze(1).long()

            # 1. outputs에 sigmoid 적용 및 특정 클래스 이진화
            probabilities = torch.sigmoid(logits)
            binary_outputs = probabilities[:, 1, :, :]  # 클래스 1의 채널 선택

            # 2. targets 이진화
            binary_targets = (resized_targets == 1).float()

            # 손실 계산
            loss = segmentation_loss(binary_outputs, binary_targets)

            # IoU 계산
            preds = (binary_outputs > 0.5).float()
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=binary_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            val_loss += loss.item()
            val_iou += iou

            # 로그 파일 기록 (Validation 배치 단위)
            logging.info(f"Epoch {epoch + 1}/{num_epochs}, Batch {batch_idx + 1}, Validation Loss: {loss.item():.4f}, Validation mIoU: {iou:.4f}")

    avg_val_loss = val_loss / len(val_dataloader)
    avg_val_iou = val_iou / len(val_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    # 로그 파일 기록 (Validation 에포크 단위)
    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    # 가장 좋은 IoU 모델 저장
    if avg_val_iou > best_val_iou:
        best_val_iou = avg_val_iou
        torch.save(model.state_dict(), best_model_path)
        print(f"New best model saved with mIoU: {best_val_iou:.4f}")
        logging.info(f"New best model saved with mIoU: {best_val_iou:.4f}")

    # Scheduler step
    scheduler.step()



Epoch 1/3


Training: 100%|██████████| 1766/1766 [06:57<00:00,  4.23it/s, Batch Loss=0.663, Batch mIoU=0.76] 


Epoch 1/3, Average Training Loss: 0.8768, Average Training mIoU: 0.5337


Validation: 100%|██████████| 379/379 [00:56<00:00,  6.74it/s]


Epoch 1/3, Average Validation Loss: 0.8786, Average Validation mIoU: 0.5788
New best model saved with mIoU: 0.5788

Epoch 2/3


Training: 100%|██████████| 1766/1766 [06:42<00:00,  4.38it/s, Batch Loss=0.875, Batch mIoU=0.718]


Epoch 2/3, Average Training Loss: 0.8736, Average Training mIoU: 0.5875


Validation: 100%|██████████| 379/379 [00:58<00:00,  6.43it/s]


Epoch 2/3, Average Validation Loss: 0.8761, Average Validation mIoU: 0.6177
New best model saved with mIoU: 0.6177

Epoch 3/3


Training: 100%|██████████| 1766/1766 [06:40<00:00,  4.41it/s, Batch Loss=0.957, Batch mIoU=0.505]


Epoch 3/3, Average Training Loss: 0.8717, Average Training mIoU: 0.6165


Validation: 100%|██████████| 379/379 [00:53<00:00,  7.10it/s]


Epoch 3/3, Average Validation Loss: 0.8768, Average Validation mIoU: 0.6257
New best model saved with mIoU: 0.6257


In [7]:
# Test dataset
test_images_dir = '/WD/content/car_damage_data_v/test/image'
test_masks_dir = '/WD/content/car_damage_data_v/masks/test'
test_data = load_data(test_images_dir, test_masks_dir)
test_dataset = Dataset.from_list(test_data).map(preprocess)

Map:   0%|          | 0/1514 [00:00<?, ? examples/s]

Map: 100%|██████████| 1514/1514 [00:59<00:00, 25.53 examples/s]


In [8]:
from torch.utils.data import DataLoader

batch_size = 4

test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
test_dataset.set_format(type='torch', columns=['pixel_values', 'labels'])

In [12]:
# ce + iou 9:1
import torch
from tqdm import tqdm
import torch.nn.functional as F
import evaluate
import logging

# 로그 설정
log_file = "/WD/improve/log/training_log_ce_iou.txt"
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# Hyperparameters
num_epochs = 3
best_val_iou = 0.0  # Best Validation IoU 초기값
best_model_path = "/WD/improve/model/best_model_ce_iou.pth"
learning_rate = 5e-5
batch_size = 4  # 배치 크기 설정

# 손실 함수 정의
ce_loss_fn = torch.nn.CrossEntropyLoss()  # CrossEntropy Loss
iou_loss_fn = IoULoss()  # IoU Loss

# Optimizer와 Scheduler 초기화
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# 학습 루프
for epoch in range(num_epochs):
    logging.info(f"Epoch {epoch + 1}/{num_epochs} started")
    print(f"\nEpoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    train_iou = 0

    # Training 단계
    model.train()
    train_progress_bar = tqdm(train_dataloader, desc="Training", leave=True)

    for batch_idx, batch in enumerate(train_progress_bar):
        optimizer.zero_grad()

        # 데이터 준비
        inputs = batch["pixel_values"].to(device)
        targets = batch["labels"].to(device)

        # 모델 출력
        outputs = model(pixel_values=inputs)
        logits = outputs.logits  # (batch_size, num_classes, height, width)

        # 레이블 크기 조정
        output_size = logits.shape[-2:]
        resized_targets = F.interpolate(
            targets.unsqueeze(1).float(),
            size=output_size,
            mode="nearest"
        ).squeeze(1).long()

        # CrossEntropy Loss 계산
        ce_loss = ce_loss_fn(logits, resized_targets)

        # IoU Loss 계산
        targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
        iou_loss = iou_loss_fn(logits, targets_one_hot)

        # 손실 조합
        total_loss = 0.9 * ce_loss + 0.1 * iou_loss
        total_loss.backward()
        optimizer.step()

        # IoU 계산
        preds = logits.argmax(dim=1)
        iou = metric.compute(
            predictions=preds.detach().cpu().numpy(),
            references=resized_targets.detach().cpu().numpy(),
            num_labels=2,
            ignore_index=None
        )["mean_iou"]

        # 손실 및 IoU 누적
        train_loss += total_loss.item()
        train_iou += iou

        train_progress_bar.set_postfix({"Batch Loss": total_loss.item(), "Batch mIoU": iou})

    avg_train_loss = train_loss / len(train_dataloader)
    avg_train_iou = train_iou / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    # Validation 단계
    model.eval()
    val_loss = 0
    val_iou = 0
    val_progress_bar = tqdm(val_dataloader, desc="Validation", leave=True)

    with torch.no_grad():
        for batch_idx, batch in enumerate(val_progress_bar):
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)
            targets = batch["labels"].to(device)

            # 모델 출력
            outputs = model(pixel_values=inputs)
            logits = outputs.logits

            # 레이블 크기 조정
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=logits.shape[-2:],
                mode="nearest"
            ).squeeze(1).long()

            # CrossEntropy Loss 계산
            ce_loss = ce_loss_fn(logits, resized_targets)

            # IoU Loss 계산
            targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
            iou_loss = iou_loss_fn(logits, targets_one_hot)

            # 손실 조합
            total_loss = 0.5 * ce_loss + 0.5 * iou_loss

            # IoU 계산
            preds = logits.argmax(dim=1)
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=resized_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            val_loss += total_loss.item()
            val_iou += iou

    avg_val_loss = val_loss / len(val_dataloader)
    avg_val_iou = val_iou / len(val_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    # 가장 좋은 IoU 모델 저장
    if avg_val_iou > best_val_iou:
        best_val_iou = avg_val_iou
        torch.save(model.state_dict(), best_model_path)
        print(f"New best model saved with mIoU: {best_val_iou:.4f}")
        logging.info(f"New best model saved with mIoU: {best_val_iou:.4f}")

    scheduler.step()


Epoch 1/3


Training: 100%|██████████| 1766/1766 [06:35<00:00,  4.46it/s, Batch Loss=0.0395, Batch mIoU=0.782]


Epoch 1/3, Average Training Loss: 0.1566, Average Training mIoU: 0.7061


Validation: 100%|██████████| 379/379 [00:53<00:00,  7.12it/s]


Epoch 1/3, Average Validation Loss: 0.1745, Average Validation mIoU: 0.6988
New best model saved with mIoU: 0.6988

Epoch 2/3


Training: 100%|██████████| 1766/1766 [06:35<00:00,  4.46it/s, Batch Loss=0.0868, Batch mIoU=0.741]


Epoch 2/3, Average Training Loss: 0.1202, Average Training mIoU: 0.7377


Validation: 100%|██████████| 379/379 [00:56<00:00,  6.71it/s]


Epoch 2/3, Average Validation Loss: 0.1691, Average Validation mIoU: 0.6981

Epoch 3/3


Training: 100%|██████████| 1766/1766 [06:29<00:00,  4.53it/s, Batch Loss=0.0897, Batch mIoU=0.858]


Epoch 3/3, Average Training Loss: 0.1067, Average Training mIoU: 0.7574


Validation: 100%|██████████| 379/379 [00:52<00:00,  7.26it/s]

Epoch 3/3, Average Validation Loss: 0.1707, Average Validation mIoU: 0.6935





### focal loss 사용

In [28]:
import torch
from tqdm import tqdm
import torch.nn.functional as F
import evaluate
import logging

# 로그 설정
log_file = "/WD/improve/log/training_log_focal_iou4.txt"
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# Hyperparameters
num_epochs = 3
best_val_iou = 0.0  # Best Validation IoU 초기값
best_model_path = "/WD/improve/model/best_model_focal_iou4.pth"
learning_rate = 5e-5
batch_size = 4  # 배치 크기 설정

# 손실 함수 정의
focal_loss_fn = FocalLoss(alpha=0.15, gamma=2.5)  # Focal Loss
iou_loss_fn = IoULoss()  # IoU Loss

# Optimizer와 Scheduler 초기화
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# 학습 루프
for epoch in range(num_epochs):
    logging.info(f"Epoch {epoch + 1}/{num_epochs} started")
    print(f"\nEpoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    train_iou = 0

    # Training 단계
    model.train()
    train_progress_bar = tqdm(train_dataloader, desc="Training", leave=True)

    for batch_idx, batch in enumerate(train_progress_bar):
        optimizer.zero_grad()

        # 데이터 준비
        inputs = batch["pixel_values"].to(device)
        targets = batch["labels"].to(device)

        # 모델 출력
        outputs = model(pixel_values=inputs)
        logits = outputs.logits  # (batch_size, num_classes, height, width)

        # 레이블 크기 조정
        output_size = logits.shape[-2:]
        resized_targets = F.interpolate(
            targets.unsqueeze(1).float(),
            size=output_size,
            mode="nearest"
        ).squeeze(1).long()

        # Focal Loss 계산
        focal_loss = focal_loss_fn(logits, resized_targets)

        # IoU Loss 계산
        targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
        iou_loss = iou_loss_fn(logits, targets_one_hot)

        # 손실 조합
        total_loss = 0.6 * focal_loss + 0.4 * iou_loss
        total_loss.backward()
        optimizer.step()

        # IoU 계산
        preds = logits.argmax(dim=1)
        iou = metric.compute(
            predictions=preds.detach().cpu().numpy(),
            references=resized_targets.detach().cpu().numpy(),
            num_labels=2,
            ignore_index=None
        )["mean_iou"]

        # 손실 및 IoU 누적
        train_loss += total_loss.item()
        train_iou += iou

        train_progress_bar.set_postfix({"Batch Loss": total_loss.item(), "Batch mIoU": iou})

    avg_train_loss = train_loss / len(train_dataloader)
    avg_train_iou = train_iou / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    # Validation 단계
    model.eval()
    val_loss = 0
    val_iou = 0
    val_progress_bar = tqdm(val_dataloader, desc="Validation", leave=True)

    with torch.no_grad():
        for batch_idx, batch in enumerate(val_progress_bar):
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)
            targets = batch["labels"].to(device)

            # 모델 출력
            outputs = model(pixel_values=inputs)
            logits = outputs.logits

            # 레이블 크기 조정
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=logits.shape[-2:],
                mode="nearest"
            ).squeeze(1).long()

            # Focal Loss 계산
            focal_loss = focal_loss_fn(logits, resized_targets)

            # IoU Loss 계산
            targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
            iou_loss = iou_loss_fn(logits, targets_one_hot)

            # 손실 조합
            total_loss = 0.6 * focal_loss + 0.4 * iou_loss

            # IoU 계산
            preds = logits.argmax(dim=1)
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=resized_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            val_loss += total_loss.item()
            val_iou += iou

    avg_val_loss = val_loss / len(val_dataloader)
    avg_val_iou = val_iou / len(val_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    logging.info(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    # 가장 좋은 IoU 모델 저장
    if avg_val_iou > best_val_iou:
        best_val_iou = avg_val_iou
        torch.save(model.state_dict(), best_model_path)
        print(f"New best model saved with mIoU: {best_val_iou:.4f}")
        logging.info(f"New best model saved with mIoU: {best_val_iou:.4f}")

    scheduler.step()



Epoch 1/3


Training:   0%|          | 0/1766 [00:00<?, ?it/s]

Training: 100%|██████████| 1766/1766 [06:44<00:00,  4.36it/s, Batch Loss=0.017, Batch mIoU=0.904] 


Epoch 1/3, Average Training Loss: 0.0923, Average Training mIoU: 0.6326


Validation: 100%|██████████| 379/379 [00:54<00:00,  7.00it/s]


Epoch 1/3, Average Validation Loss: 0.0788, Average Validation mIoU: 0.6109
New best model saved with mIoU: 0.6109

Epoch 2/3


Training:  34%|███▍      | 605/1766 [02:17<04:24,  4.39it/s, Batch Loss=0.0798, Batch mIoU=0.671]


KeyboardInterrupt: 

In [26]:
import torch
from tqdm import tqdm
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import evaluate
import os

# Hyperparameters
num_epochs = 3
best_val_iou = 0.0  # Best Validation IoU 초기값
best_model_path = "./original.pth"
learning_rate = 5e-5
batch_size = 4  # 배치 크기 설정

# 로그 파일 설정
log_file = "original.txt"
if os.path.exists(log_file):
    os.remove(log_file)  # 이전 로그 파일 삭제

def write_log(message):
    with open(log_file, "a") as f:
        f.write(message + "\n")
    print(message)

# mIoU 메트릭 초기화
metric = evaluate.load("mean_iou")

# Optimizer와 Scheduler 초기화
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # 5 에포크마다 lr 감소

# 손실 함수 정의
segmentation_loss = torch.nn.CrossEntropyLoss()

# 학습 루프
for epoch in range(num_epochs):
    write_log(f"\nEpoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    train_iou = 0

    # Training 단계
    model.train()
    train_progress_bar = tqdm(train_dataloader, desc="Training", leave=True)

    for batch_idx, batch in enumerate(train_progress_bar):
        optimizer.zero_grad()

        # 데이터 준비
        inputs = batch["pixel_values"].to(device)
        targets = batch["labels"].to(device)

        # 모델 출력
        outputs = model(pixel_values=inputs)
        logits = outputs.logits  # (batch_size, num_classes, height, width)

        # 레이블 크기 조정
        output_size = logits.shape[-2:]  # (height, width)
        resized_targets = F.interpolate(
            targets.unsqueeze(1).float(),
            size=output_size,
            mode="nearest"
        ).squeeze(1).long()

        # 손실 계산
        loss = segmentation_loss(logits, resized_targets)
        loss.backward()
        optimizer.step()

        # IoU 계산
        preds = logits.argmax(dim=1)  # [batch_size, height, width]
        iou = metric.compute(
            predictions=preds.detach().cpu().numpy(),
            references=resized_targets.detach().cpu().numpy(),
            num_labels=2,
            ignore_index=None
        )["mean_iou"]

        # 손실 및 IoU 누적
        train_loss += loss.item()
        train_iou += iou

        train_progress_bar.set_postfix({"Batch Loss": loss.item(), "Batch mIoU": iou})

    avg_train_loss = train_loss / len(train_dataloader)
    avg_train_iou = train_iou / len(train_dataloader)
    write_log(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}, Average Training mIoU: {avg_train_iou:.4f}")

    # Scheduler step
    scheduler.step()

    # Validation 단계
    model.eval()
    val_loss = 0
    val_iou = 0
    val_progress_bar = tqdm(val_dataloader, desc="Validation", leave=True)

    with torch.no_grad():
        for batch_idx, batch in enumerate(val_progress_bar):
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)
            targets = batch["labels"].to(device)

            # 모델 출력
            outputs = model(pixel_values=inputs)
            logits = outputs.logits

            # 레이블 크기 조정
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=logits.shape[-2:],
                mode="nearest"
            ).squeeze(1).long()

            # 손실 계산
            loss = segmentation_loss(logits, resized_targets)

            # IoU 계산
            preds = logits.argmax(dim=1)
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=resized_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            val_loss += loss.item()
            val_iou += iou

    avg_val_loss = val_loss / len(val_dataloader)
    avg_val_iou = val_iou / len(val_dataloader)
    write_log(f"Epoch {epoch + 1}/{num_epochs}, Average Validation Loss: {avg_val_loss:.4f}, Average Validation mIoU: {avg_val_iou:.4f}")

    # 가장 좋은 IoU 모델 저장
    if avg_val_iou > best_val_iou:
        best_val_iou = avg_val_iou
        torch.save(model.state_dict(), best_model_path)
        write_log(f"New best model saved with mIoU: {best_val_iou:.4f}")



Epoch 1/3


Training:   1%|          | 19/1766 [00:04<07:07,  4.08it/s, Batch Loss=0.034, Batch mIoU=0.87]  


KeyboardInterrupt: 

### test

In [18]:
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
from torch.utils.data import DataLoader
import torch
import torch.nn.functional as F
from tqdm import tqdm
import evaluate

# Hyperparameters
batch_size = 4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 설정
model_name = "nvidia/mit-b3"  # 사용한 모델 이름
processor = SegformerImageProcessor.from_pretrained(model_name)

# IoU 메트릭
metric = evaluate.load("mean_iou")

# 손실 함수
ce_loss_fn = torch.nn.CrossEntropyLoss()
iou_loss_fn = IoULoss()  # IoU Loss 정의 필요

# 모델 로드 함수
def load_trained_model(pth_path, device):
    # 모델 구조 초기화
    model = SegformerForSemanticSegmentation.from_pretrained(
        model_name, num_labels=2, id2label=id2label, label2id=label2id
    )
    # .pth 파일의 가중치 불러오기
    state_dict = torch.load(pth_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    return model

# 테스트 함수
def test_model(model, test_dataloader, device):
    test_loss = 0.0
    test_iou = 0.0
    model.eval()

    test_progress_bar = tqdm(test_dataloader, desc="Testing", leave=True)
    
    with torch.no_grad():  # 그래디언트 비활성화
        for batch in test_progress_bar:
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)  # 이미지 입력
            targets = batch["labels"].to(device)      # 라벨 입력
            
            # 모델 예측
            outputs = model(pixel_values=inputs)
            logits = outputs.logits  # (batch_size, num_classes, height, width)
            
            # 레이블 크기 맞춤
            output_size = logits.shape[-2:]
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=output_size,
                mode="nearest"
            ).squeeze(1).long()
            
            # 손실 계산
            ce_loss = ce_loss_fn(logits, resized_targets)
            targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
            iou_loss = iou_loss_fn(logits, targets_one_hot)
            total_loss = 0.8 * ce_loss + 0.2 * iou_loss

            # IoU 계산
            preds = logits.argmax(dim=1)
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=resized_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            test_loss += total_loss.item()
            test_iou += iou

            test_progress_bar.set_postfix({"Batch Loss": total_loss.item(), "Batch mIoU": iou})
    
    avg_test_loss = test_loss / len(test_dataloader)
    avg_test_iou = test_iou / len(test_dataloader)

    print(f"\nAverage Test Loss: {avg_test_loss:.4f}, Average Test mIoU: {avg_test_iou:.4f}")
    return avg_test_loss, avg_test_iou

# 경로 설정
pth_path = "/WD/improve/model/best_model_focal_iou2.pth"

# 모델 불러오기
model = load_trained_model(pth_path, device)

# 테스트 실행
avg_test_loss, avg_test_iou = test_model(model, test_dataloader, device)
print(f"Final Test Loss: {avg_test_loss:.4f}, Final Test mIoU: {avg_test_iou:.4f}")


  return func(*args, **kwargs)
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b3 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  state_dict = torch.load(pth_path, map_location=device)
Testing: 100%|██████████| 379/379 [00:53<00:00,  7.11it/s, Batch Loss=0.153, Ba


Average Test Loss: 0.2307, Average Test mIoU: 0.6918
Final Test Loss: 0.2307, Final Test mIoU: 0.6918





In [30]:
import torch
from tqdm import tqdm
import torch.nn.functional as F
import evaluate
import logging

# 로그 설정
test_log_file = "/WD/improve/log/test_log_focal_iou3.txt"
logging.basicConfig(filename=test_log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# 손실 함수 정의
focal_loss_fn = FocalLoss(alpha=0.15, gamma=2.5)  # Focal Loss
iou_loss_fn = IoULoss()  # IoU Loss

# IoU 메트릭
metric = evaluate.load("mean_iou")

# 테스트 함수
def test_model(model, test_dataloader, device):
    test_loss = 0.0
    test_iou = 0.0
    model.eval()  # 평가 모드 설정

    test_progress_bar = tqdm(test_dataloader, desc="Testing", leave=True)

    with torch.no_grad():  # 그래디언트 비활성화
        for batch_idx, batch in enumerate(test_progress_bar):
            # 데이터 준비
            inputs = batch["pixel_values"].to(device)
            targets = batch["labels"].to(device)

            # 모델 출력
            outputs = model(pixel_values=inputs)
            logits = outputs.logits  # (batch_size, num_classes, height, width)

            # 레이블 크기 조정
            output_size = logits.shape[-2:]
            resized_targets = F.interpolate(
                targets.unsqueeze(1).float(),
                size=output_size,
                mode="nearest"
            ).squeeze(1).long()

            # Focal Loss 계산
            focal_loss = focal_loss_fn(logits, resized_targets)

            # IoU Loss 계산
            targets_one_hot = F.one_hot(resized_targets, num_classes=logits.shape[1]).permute(0, 3, 1, 2).float()
            iou_loss = iou_loss_fn(logits, targets_one_hot)

            # 손실 조합
            total_loss = 0.7 * focal_loss + 0.3 * iou_loss

            # IoU 계산
            preds = logits.argmax(dim=1)
            iou = metric.compute(
                predictions=preds.detach().cpu().numpy(),
                references=resized_targets.detach().cpu().numpy(),
                num_labels=2,
                ignore_index=None
            )["mean_iou"]

            # 손실 및 IoU 누적
            test_loss += total_loss.item()
            test_iou += iou

            # 로그 기록
            logging.info(f"Batch {batch_idx + 1}, Test Loss: {total_loss.item():.4f}, Test mIoU: {iou:.4f}")
            test_progress_bar.set_postfix({"Batch Loss": total_loss.item(), "Batch mIoU": iou})

    # 평균 손실 및 IoU 계산
    avg_test_loss = test_loss / len(test_dataloader)
    avg_test_iou = test_iou / len(test_dataloader)

    print(f"\nAverage Test Loss: {avg_test_loss:.4f}, Average Test mIoU: {avg_test_iou:.4f}")
    logging.info(f"Average Test Loss: {avg_test_loss:.4f}, Average Test mIoU: {avg_test_iou:.4f}")

    return avg_test_loss, avg_test_iou


# 모델 로드
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model_path = "/WD/improve/model/best_model_focal_iou3.pth"

# 모델 로드 및 초기화
model.load_state_dict(torch.load(best_model_path, map_location=device))  # 저장된 모델 가중치 불러오기
model.to(device)
model.eval()

# 테스트 데이터 로드 (예시)
# 테스트 데이터는 pixel_values와 labels를 포함해야 합니다.
# test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# 테스트 실행
print("\nStarting Test...")
avg_test_loss, avg_test_iou = test_model(model, test_dataloader, device)
print(f"Final Test Loss: {avg_test_loss:.4f}, Final Test mIoU: {avg_test_iou:.4f}")


  model.load_state_dict(torch.load(best_model_path, map_location=device))  # 저장된 모델 가중치 불러오기



Starting Test...


Testing: 100%|██████████| 379/379 [00:56<00:00,  6.66it/s, Batch Loss=0.0593, Batch mIoU=0.644] 


Average Test Loss: 0.0685, Average Test mIoU: 0.6926
Final Test Loss: 0.0685, Final Test mIoU: 0.6926



