In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.transforms as transforms
import torchvision.models
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import segmentation_models_pytorch as smp

In [2]:
sample_submission = True

In [3]:
transform = transforms.Compose([
                    transforms.PILToTensor()
])

In [4]:
def encoded_pixels_to_masks(fname: str, df: pd.DataFrame):
    fname_df = df[df['ImageId'] == fname]
    masks = np.zeros((256 * 1600, 4), dtype=int) # float32 is V.Imp

    for i_row, row in fname_df.iterrows():
        cls_id = row['ClassId']
        encoded_pixels = row['EncodedPixels']
        if encoded_pixels is not np.nan:
            pixel_list = list(map(int, encoded_pixels.split(' ')))
            for i in range(0, len(pixel_list), 2):
                start_pixel = pixel_list[i] - 1
                num_pixel = pixel_list[i+1]
                masks[start_pixel:(start_pixel+num_pixel), cls_id-1] = 1
               
    masks = masks.reshape(256, 1600, 4, order='F')

    return masks

def masks_to_encoded_pixels(masks: np.ndarray):
    masks = masks.reshape(256*1600, 4, order='F')
    encoded_pixels_list = []
    for cls_id in range(4):
        cls_mask = masks[:, cls_id]
        cls_mask = cls_mask.reshape(256, 1600, order='F')
        cls_mask = cls_mask.T.flatten()
        prev_pixel = 0
        prev_pixel_val = 0
        encoded_pixels = []
        for i, pixel_val in enumerate(cls_mask):
            if pixel_val != prev_pixel_val:
                if pixel_val == 1:
                    start_pixel = i + 1
                    encoded_pixels.append(start_pixel - prev_pixel)
                else:
                    num_pixel = i - prev_pixel
                    encoded_pixels.append(num_pixel)
                prev_pixel = i
                prev_pixel_val = pixel_val
        encoded_pixels_list.append(encoded_pixels)
    return encoded_pixels_list # shape: 4x[]

### Solution

In [5]:
class SeverstalSteelDataset(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df.reset_index(drop=True) 
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        fname = self.df.ImageId[idx]
        img_path = os.path.join(self.img_dir, fname)
        img = Image.open(img_path)
        img = np.array(Image.open(img_path).convert('RGB')) 
        masks = encoded_pixels_to_masks(fname, self.df)
        img = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)
        masks = torch.tensor(masks, dtype=torch.float32).permute(2, 0, 1)
        return fname, img, masks
    
# collate function if needed
def collate_fn(batch_items):
    batched_fnames = [item[0] for item in batch_items]
    batched_imgs = torch.stack([item[1] for item in batch_items])
    batched_masks = torch.stack([item[2] for item in batch_items])
    return batched_fnames, batched_imgs, batched_masks
    

In [6]:
class SegModel(torch.nn.Module):
    def __init__(self, num_classes=4):
        super(SegModel, self).__init__()
        self.model = smp.Unet(encoder_name='resnet34', encoder_weights='imagenet', classes=num_classes, activation=None)
    def forward(self, x):
        return self.model(x)

In [7]:
def dice_score(preds, targets, smooth=1e-6):
    preds = preds.reshape(-1)
    targets = targets.reshape(-1)
    
    intersection = (preds * targets).sum()
    return (2.0 * intersection) / (preds.sum() + targets.sum() + smooth)


In [8]:
def load_data(csv_path, img_folder_path, batch_size=4, val_split=0.2):
    if sample_submission:
        df = pd.read_csv(csv_path).sample(frac=0.01, random_state=10)
    else:
        df = pd.read_csv(csv_path)
    train_df, val_df = train_test_split(df, test_size=val_split, random_state=42)
    
    # Создаем датасеты
    train_dataset = SeverstalSteelDataset(train_df, img_folder_path, transform=transform)
    val_dataset = SeverstalSteelDataset(val_df, img_folder_path, transform=transform)
    
    # Создаем загрузчики
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,  num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_loader, val_loader

In [9]:
def init_model(device):
    model = SegModel().to(device) 
    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)
    return model, criterion, optimizer, scheduler

def train(model, loader, optimizer, criterion, device):
    
    model.train()  
    train_loss = 0.0
    for batch_idx, (fnames, imgs, masks) in enumerate(loader):
        imgs = imgs.to(device)
        masks = masks.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(imgs)
        outputs = torch.sigmoid(outputs)
        
        loss = criterion(outputs, masks)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        if(batch_idx % 10 == 0):
            print("Batch #{0} : [train_loss : {1}]".format(batch_idx, loss.item()))
    # возвращаем средний лосс
    return train_loss / len(loader)

def validate(model, loader, criterion, device):
    model.eval()
    val_loss = 0.0
    
    dice_scores = [[], [], [], []]
                                 
    with torch.no_grad():
        for batch_idx, (fnames, imgs, masks) in enumerate(val_loader):
            imgs = imgs.to(device)
            masks = masks.to(device)
            
            outputs = model(imgs)
            outputs = torch.sigmoid(outputs)
            
            preds = torch.sigmoid(outputs).cpu().numpy()
            
            loss = criterion(outputs, masks)
            masks = masks.cpu().numpy()
            val_loss += loss.item()
            
            if(batch_idx % 10 == 0):
                print("Batch #{0} : [val_loss : {1}]".format(batch_idx, loss.item()))
            
            for i in range(masks.shape[1]):
                score = dice_score(preds[:, i], masks[:, i])
                dice_scores[i].append(score)
            
    avg_loss = val_loss / len(loader)
    avg_dice = []
    for class_dice in dice_scores:  
        avg_dice.append(sum(class_dice) / len(class_dice) if class_dice else 0.0)
    
    return avg_loss, avg_dice

def fit(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    for epoch in range(num_epochs):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        val_loss, score = validate(model, val_loader, criterion, device)
        print("Epoch #{0}: [val_loss : {1}, train_loss: {2}, dice_score: {3}]".format(epoch, val_loss, train_loss, score))
        scheduler.step(val_loss)


In [10]:
def evaluate(model, test_images_dir, device): 
    model.eval()

    # Список для хранения результатов
    results = []
    
    image_ids = [f for f in os.listdir(test_images_dir)]

    if sample_submission:
        image_ids = image_ids[:10]
    
    with torch.no_grad():
        for image_id in image_ids:
            # Чтение изображения
            image_path = os.path.join(test_images_dir, image_id)
            image = np.array(Image.open(image_path).convert('RGB')) 
            image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
            image = image.unsqueeze(0).to(device)

            outputs = model(image)
            
            mask = torch.sigmoid(outputs) # Бинаризация
            outputs = torch.sigmoid(outputs).squeeze(0).cpu().numpy() # Преобразуем к Numpy
            
            encoded_pixels = masks_to_encoded_pixels(mask.cpu().numpy()) # Преобразование в EncodedPixels
            
            for class_id in range(4):
                
                enc_pixels = " ".join(str(x) for x in encoded_pixels[class_id])

                if encoded_pixels: # Если маска непустая
                    results.append({
                    'ImageId': image_id,
                    'EncodedPixels': encoded_pixels,
                    'ClassId': class_id
                })

    return pd.DataFrame(results, columns=['ImageId', 'EncodedPixels', 'ClassId'])

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Used device: {0}".format(device))
model, criterion, optimizer, scheduler = init_model(device)
train_loader, val_loader = load_data("../data/train.csv", "../data/train_images")

Used device: cuda


In [12]:
fit(model, train_loader, val_loader, criterion, optimizer, scheduler, device)


Batch #0 : [train_loss : 0.981880784034729]
Batch #10 : [train_loss : 0.9532279968261719]
Batch #0 : [val_loss : 0.9860326647758484]
Epoch #0: [val_loss : 0.9825904965400696, train_loss: 0.9632567805903298, dice_score: [0.0012831801769974467, 0.0048335821845415065, 0.05597640436691771, 0.0]]
Batch #0 : [train_loss : 0.9375742673873901]
Batch #10 : [train_loss : 0.9312376379966736]
Batch #0 : [val_loss : 0.9408236742019653]
Epoch #1: [val_loss : 0.9416815787553787, train_loss: 0.9328219890594482, dice_score: [0.0012216955872585739, 0.005168603432813786, 0.05612621281656001, 0.0]]
Batch #0 : [train_loss : 0.917434811592102]
Batch #10 : [train_loss : 0.9141151905059814]
Batch #0 : [val_loss : 0.8976342678070068]
Epoch #2: [val_loss : 0.9043236970901489, train_loss: 0.9151129296847752, dice_score: [0.0012895382267455505, 0.005210473683725723, 0.05685587056135412, 0.0]]
Batch #0 : [train_loss : 0.9065769910812378]
Batch #10 : [train_loss : 0.9020928144454956]
Batch #0 : [val_loss : 0.877112

In [13]:
submission_df = evaluate(model, "../data/test_images", device)
submission_df.to_csv("my_submission.csv", index=False)
# submission_df