In [7]:
import cv2
import tifffile
from pathlib import Path
import shutil
import concurrent.futures
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
from transformers import (
    SegformerForSemanticSegmentation, 
    TrainingArguments, Trainer, 
    SegformerImageProcessor)
from datasets import Dataset, Image
import evaluate
import matplotlib.pyplot as plt

### 모델 로딩

In [9]:
# Pre-trained models
# MODEL_CHECKPOINT = 'nvidia/mit-b0'
# MODEL_CHECKPOINT = 'nvidia/mit-b1'
# MODEL_CHECKPOINT = 'nvidia/mit-b2'
# MODEL_CHECKPOINT = 'nvidia/mit-b3'
MODEL_CHECKPOINT = 'nvidia/mit-b4'
# MODEL_CHECKPOINT = 'nvidia/mit-b5'
# MODEL_CHECKPOINT = 'nvidia/segformer-b4-finetuned-cityscapes-1024-1024'

VAL_SIZE = 0.1
BATCH_SIZE = 2
EPOCHS = 1
LR = 0.00006

IMG_SIZE = 512

In [10]:
id2label = {0: 'background', 1: 'water'}
label2id = {label: id for id, label in id2label.items()}
num_labels = len(id2label)

model = SegformerForSemanticSegmentation.from_pretrained(
    MODEL_CHECKPOINT,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b4 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should pr

### 커스텀 데이터 셋

In [None]:
from torch.utils.data import Dataset
import os
from PIL import Image
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

class SemanticSegmentationDataset(Dataset):
    """Image (semantic) segmentation dataset."""

    def __init__(self, root_dir, train=True):
        """
        Args:
            root_dir (string): Root directory of the dataset containing the images + annotations.
            feature_extractor (SegFormerFeatureExtractor): feature extractor to prepare images + segmentation maps.
            train (bool): Whether to load "training" or "validation" images + annotations.
        """
        self.root_dir = root_dir
        #self.feature_extractor = feature_extractor
        self.train = train
        self.transforms = self.create_transforms()
        
        #sub_path = "training" if self.train else "validation"
        self.img_dir = os.path.join(self.root_dir, "images")
        self.ann_dir = os.path.join(self.root_dir, "masks")
        
        # read images
        image_file_names = []
        for root, dirs, files in os.walk(self.img_dir):
          image_file_names.extend(files)
        self.images = sorted(image_file_names)
        
        # read annotations
        annotation_file_names = []
        for root, dirs, files in os.walk(self.ann_dir):
          annotation_file_names.extend(files)
        self.annotations = sorted(annotation_file_names)

        assert len(self.images) == len(self.annotations), "There must be as many images as there are segmentation maps"
    
    def create_transforms(self):
        _transforms = A.Compose([
            A.Resize(height=512, width=512), 
            ToTensorV2(),
        ], p = 1,
        )
        return _transforms
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        
        image = np.array(Image.open(os.path.join(self.img_dir, self.images[idx])).convert('RGB'))
        mask  = np.array(Image.open(os.path.join(self.ann_dir, self.annotations[idx])).convert('L'))
        if image.shape[-1] == 4:
            image = image[:, :, :4]
        encoded_inputs =  self.transforms(image = image, mask = mask)


        return encoded_inputs

In [None]:
import logging
import os
import json
from PIL import Image
from torchvision.transforms import ToTensor
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, RandomSampler, DistributedSampler, SequentialSampler

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir,annotations_dir,  transform=None):
        """
        annotation_dir (string): 메타데이터가 있는 JSON 파일의 경로
        img_dir (string): 모든 이미지가 있는 디렉토리의 경로
        transform (callable, optional): 샘플에 적용될 선택적 변환
        """
        self.img_dir = img_dir
        self.transform = transform
        
        self.annotation_dir= annotations_dir
        

    def __len__(self):
        label_list= os.listdir(self.annotation_dir)
        return len(label_list)

    def __getitem__(self, idx):
        label_list= os.listdir(self.annotation_dir)
        
        img_path = os.path.join(self.img_dir, label_list[idx].split('.')[0]+'.'+label_list[idx].split('.')[1])

        try:
            image = Image.open(img_path)
        except (IOError, OSError) as e:
            print(f"Error loading image {img_path}: {e}")
            return self.__getitem__((idx + 1) % len(self))
        
        
        
        # faceExp_uploader 부분만 라벨로 사용
        with open(self.annotation_dir+'/'+label_list[idx],'r', encoding='utf-8') as f:
            self.image_labels=json.load(f)
        label = self.image_labels['faceExp_uploader']
        label_to_int = {'기쁨': 0, '당황': 1, '중립': 2}

        # 문자열 라벨을 정수로 매핑
        label_int = label_to_int[label]
        label_tensor = torch.tensor(label_int, dtype=torch.long)
        
        if self.transform:
            image_tensor = self.transform(image)
        else:
            # 기본적으로 이미지를 Tensor로 변환
            transform = ToTensor()
            image_tensor = transform(image)
        
        return image_tensor, label_tensor

### 데이터 생성

In [None]:
from transformers import SegformerFeatureExtractor

root_dir = '/kaggle/input/supervisely-filtered-segmentation-person-dataset/supervisely_person_clean_2667_img'


train_dataset = SemanticSegmentationDataset(root_dir=root_dir)
valid_dataset = SemanticSegmentationDataset(root_dir=root_dir)

### 데이터 로더

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=2)

### IOU mehtod

In [None]:
from datasets import load_metric
metric = load_metric("mean_iou")

### training

In [None]:
import numpy as np
import torch
from torch import nn
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

# define optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00006)
# move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.train()
for epoch in range(1):  
   print("Epoch:", epoch)
   for idx, batch in enumerate(tqdm(train_dataloader)):
        # get the inputs;
        pixel_values = batch["image"].to(device).float()
        labels = batch["mask"].to(device).long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(pixel_values=pixel_values, labels=labels)
        loss, logits = outputs.loss, outputs.logits
        
        loss.backward()
        optimizer.step()

        # evaluate
        with torch.no_grad():
          upsampled_logits = nn.functional.interpolate(logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
          predicted = upsampled_logits.argmax(dim=1)
          
          # note that the metric expects predictions + labels as numpy arrays
          metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # let's print loss and metrics every 100 batches
        if idx % 100 == 0:
          metrics = metric._compute(references = labels.cpu(), 
                                    predictions = predicted.cpu(),
                                    num_labels=len(id2label), 
                                   ignore_index=255,
                                   reduce_labels=False, # we've already reduced the labels before)
          )

          print("Loss:", loss.item())
          print("Mean_iou:", metrics["mean_iou"])
          print("Mean accuracy:", metrics["mean_accuracy"])