In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#os.remove('/kaggle/working/datasets/cityscapes.pth')
# for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#       print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.transforms.functional as functional
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from glob import glob

In [3]:
train_path = glob('/kaggle/input/cityscapes-image-pairs/cityscapes_data/train/*')
valid_path = glob('/kaggle/input/cityscapes-image-pairs/cityscapes_data/val/*')

In [4]:
class Cityscapes(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None):
        super(Cityscapes, self).__init__()
        self.data_path = data_path
        #self.datasets = np.array(data)
        #self.images, self.targets = np.array_split(self.datasets, 2, axis=2)
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, item):
        image_pair = plt.imread(self.data_path[item])
        image, target = image_pair[:, :int(image_pair.shape[1] / 2)], image_pair[:, int(image_pair.shape[1] / 2):]
        #image = self.images[item]
        #target = self.targets[item]
        if self.transform is not None:
            image = self.transform(image)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return image, target

    def __len__(self):
        return len(self.data_path)

In [5]:
def read_directory(path):
    image_list = []
    for filename in os.listdir(path):
        image = plt.imread(path + '/' + filename)
        image_list.append(image)
    return image_list

In [6]:
def load_data(path):
#     if path is not None and os.path.exists(path):
#         return torch.load(path)['train_loader']
#     else:
        transform_list = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        target_transform = transforms.Compose([
            transforms.ToTensor()
        ])
        #cityscapes = read_directory('/kaggle/input/cityscapes-image-pairs/cityscapes_data/train')
        train_dataset = Cityscapes(train_path, transform=transform_list, target_transform=target_transform)
        train_loader = DataLoader(train_dataset, batch_size=24, shuffle=True, drop_last=True)
#         path = path if path is not None else '/kaggle/working/datasets/' + str(uuid.UUID()) + '.pth'
#         if not os.path.exists(path):
#             index = path.rindex('/')
#             dirs = path[:index]
#             if not os.path.exists(dirs):
#                 os.mkdir(dirs)
#             with open(path, 'w'):
#                 pass
#         datas = {
#             'initial_dataset': cityscapes,
#             'train_loader': train_loader
#         }
#         torch.save(datas, path)
        return train_loader

In [7]:
class UNet(nn.Module):
    """U-Net模型的pytorch实现。
    论文地址：https://arxiv.org/abs/1505.04597
    模型的总体结构: 编码器 -> 一个ConvBlock -> 解码器 -> 一个Conv 1 * 1
    """

    def __init__(self):
        super(UNet, self).__init__()
        # 编码器部分
        self.eb1 = EncoderBlock(3, 64, 64, kernel_size=2)
        self.eb2 = EncoderBlock(64, 128, 128, kernel_size=2)
        self.eb3 = EncoderBlock(128, 256, 256, kernel_size=2)
        self.eb4 = EncoderBlock(256, 512, 512, kernel_size=2)
        # 编码器与解码器之间有一个ConvBlock
        self.cb = ConvBlock(512, 1024, 1024)
        # 解码器部分
        self.db1 = DecoderBlock(1024, 512, 512)
        self.db2 = DecoderBlock(512, 512, 256)
        self.db3 = DecoderBlock(256, 128, 128)
        self.db4 = DecoderBlock(128, 64, 64)
        # 一个Conv 1 * 1
        self.conv1x1 = nn.Conv2d(64, 3, kernel_size=1)

    def forward(self, x):
        ex1, skip_x1 = self.eb1(x)
        ex2, skip_x2 = self.eb2(ex1)
        ex3, skip_x3 = self.eb3(ex2)
        ex4, skip_x4 = self.eb4(ex3)
        cbx = self.cb(ex4)
        dx1 = self.db1(cbx, skip_x4)
        dx2 = self.db2(dx1, skip_x3)
        dx3 = self.db3(dx2, skip_x2)
        dx4 = self.db4(dx3, skip_x1)
        crop = transforms.CenterCrop(size=(x.shape[-1], x.shape[-2]))
        normalize = transforms.Normalize((0.5,), (0.5,))
        return self.conv1x1(normalize(crop(dx4)))


class ConvBlock(nn.Module):
    """一个Conv2d卷积后跟一个Relu激活函数，卷积核大小为3 * 3

    :param in_channels: 层次块的输入通道数
    :param mid_channels: 层次块中间一层卷积的通道数
    :param out_channels: 层次块输出层的通道数
    """

    def __init__(self, in_channels, mid_channels, out_channels):
        super(ConvBlock, self).__init__()
        conv_relu_list = [nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=2),
                          nn.BatchNorm2d(mid_channels),
                          nn.ReLU(inplace=True),
                          nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=2),
                          nn.BatchNorm2d(out_channels),
                          nn.ReLU(inplace=True)]
        self.conv_relu = nn.Sequential(*conv_relu_list)

    def forward(self, x):
        return self.conv_relu(x)


class DownSampling(nn.Module):
    """下采样，使用max pool方法执行，核大小为 2 * 2，用在编码器的ConvBlock后面

    :param kernel_size: 下采样层（即最大池化层）的核大小
    """

    def __init__(self, kernel_size):
        super(DownSampling, self).__init__()
        self.down_sample = nn.MaxPool2d(kernel_size=kernel_size)

    def forward(self, x):
        return self.down_sample(x)


class UpSampling(nn.Module):
    """上采样，用在解码器的ConvBlock前面，使用转置卷积，同时通道数减半，

    C_out = out_channels
    H_out = (H_in - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + output_padding + 1
    W_out = (W_in - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + output_padding + 1

    :param in_channels: 转置卷积的输入通道数
    :param out_channels: 转置卷积的输出通道数
    :param kernel_size: 转置卷积的卷积核大小，默认为2
    :param stride: 转置卷积的步幅，默认为2
    """

    def __init__(self, in_channels, out_channels, kernel_size=7, stride=2, dilation=1, padding=0, output_padding=1):
        super(UpSampling, self).__init__()
        # self.up_sample = nn.Upsample(scale_factor=scale_factor, mode='bilinear')
        # stride=2, kernel_size=2相当于宽高翻倍
        self.up_sample = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
                                            dilation=dilation, padding=padding, output_padding=output_padding)

    def forward(self, x):
        return self.up_sample(x)


class EncoderBlock(nn.Module):
    """编码器中的一个层次块

    :param in_channels: 层次块的输入通道数
    :param mid_channels: 层次块中间一层卷积的通道数
    :param out_channels: 层次块输出层的通道数
    :param kernel_size: 下采样层（即最大池化层）的核大小
    """

    def __init__(self, in_channels, mid_channels, out_channels, kernel_size):
        super(EncoderBlock, self).__init__()
        self.conv_block = ConvBlock(in_channels, mid_channels, out_channels)
        self.down_sample = DownSampling(kernel_size)

    def forward(self, x):
        x1 = self.conv_block(x)
        return self.down_sample(x1), x1


class ConcatLayer(nn.Module):
    """跳跃连接，在通道维上连接

    """

    def __init__(self):
        super(ConcatLayer, self).__init__()

    def forward(self, x, skip_x):
        # 将从编码器传过来的特征图裁剪到与输入相同尺寸
        x1 = functional.center_crop(skip_x, [x.shape[-2], x.shape[-1]])
        if x1.shape != x.shape:
            raise Exception('要连接的两个特征图尺寸不一致，skip_x.shape={}，x.shape={}'.format(skip_x.shape, x.shape))
        # 通道维连接
        return torch.cat([x, x1], dim=1)


class DecoderBlock(nn.Module):
    """解码器中的层次块，每个层次块都是UpSampling -> Concat -> ConvBlock

    :param in_channels: 层次块的输入通道数
    :param mid_channels: 层次块中间一层卷积的通道数
    :param out_channels: 层次块输出层的通道数
    """

    def __init__(self, in_channels, mid_channels, out_channels):
        super(DecoderBlock, self).__init__()
        self.up_sample = UpSampling(in_channels, out_channels)
        self.conv_block = ConvBlock(in_channels, mid_channels, out_channels)

    def forward(self, x, skip_x):
        x1 = self.up_sample(x)
        concat = ConcatLayer()
        x2 = concat(x1, skip_x)
        return self.conv_block(x2)

In [8]:
def trainer(model, dataloader, optimizer, loss, epoch, device=None):
    epoch_index_list = []
    loss_change_list = []
    for i in range(epoch):
        total_loss = 0.0
        for index, (image, label) in enumerate(dataloader):
            segment_mask = model(image.to(device))
            loss_value = loss(segment_mask, label.to(device))
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            total_loss = total_loss + loss_value
            if index == len(dataloader) - 1:
                train_params = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': i,
                    'loss': loss_value
                }
                path = '/kaggle/working/model_params/u_net_seg.pth'
                if not os.path.exists(path):
                    index = path.rindex('/')
                    dirs = path[:index]
                    if not os.path.exists(dirs):
                        os.mkdir(dirs)
                    with open(path, 'w'):
                        pass
                torch.save(train_params, path)
                print('epoch {} batch {}/{} average loss = {:.4f} last loss = {:.4f}'.format(i + 1, index + 1,
                                                                                             len(dataloader),
                                                                                             total_loss / len(dataloader),
                                                                                             loss_value))
            else:
                print('epoch {} batch {}/{} loss = {:.4f}'.format(i + 1, index + 1, len(dataloader), loss_value))
        epoch_index_list.append(i)
        loss_change_list.append(total_loss / len(dataloader))
    return model

In [9]:
params = {
    'epoch': 20,
    'lr': 3e-4,
    'betas': (0.5, 0.999)
}

In [10]:
if __name__ == '__main__':
    current_device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(current_device)
    data_loader = load_data('/kaggle/working/datasets/cityscapes.pth')
    unet = UNet().to(current_device)
    mse_loss = nn.MSELoss().to(current_device)
    optimizer_adam = optim.Adam(unet.parameters(), lr=params.get("lr"), betas=params.get("betas"))
    unet = trainer(unet, data_loader, optimizer_adam, mse_loss, params.get('epoch'), device=current_device)

cuda
epoch 1 batch 1/123 loss = 0.7607
epoch 1 batch 2/123 loss = 0.6414
epoch 1 batch 3/123 loss = 0.5026
epoch 1 batch 4/123 loss = 0.3911
epoch 1 batch 5/123 loss = 0.3311
epoch 1 batch 6/123 loss = 0.2755
epoch 1 batch 7/123 loss = 0.2509
epoch 1 batch 8/123 loss = 0.2170
epoch 1 batch 9/123 loss = 0.2051
epoch 1 batch 10/123 loss = 0.1932
epoch 1 batch 11/123 loss = 0.1779
epoch 1 batch 12/123 loss = 0.1683
epoch 1 batch 13/123 loss = 0.1532
epoch 1 batch 14/123 loss = 0.1325
epoch 1 batch 15/123 loss = 0.1295
epoch 1 batch 16/123 loss = 0.1153
epoch 1 batch 17/123 loss = 0.1144
epoch 1 batch 18/123 loss = 0.1103
epoch 1 batch 19/123 loss = 0.0978
epoch 1 batch 20/123 loss = 0.0998
epoch 1 batch 21/123 loss = 0.0908
epoch 1 batch 22/123 loss = 0.0924
epoch 1 batch 23/123 loss = 0.0843
epoch 1 batch 24/123 loss = 0.0748
epoch 1 batch 25/123 loss = 0.0730
epoch 1 batch 26/123 loss = 0.0659
epoch 1 batch 27/123 loss = 0.0655
epoch 1 batch 28/123 loss = 0.0617
epoch 1 batch 29/123 los