In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.segmentation import deeplabv3_resnet50
import torchvision.transforms as transforms

import numpy as np

In [2]:
import os
from PIL import Image
from pathlib import Path


class AerialDataset(Dataset):
    """
    
    """
    def __init__(self, original_img_dir, seg_dir, transform, target_transform):
        super(AerialDataset).__init__()
        self.original_img_dir = Path(original_img_dir)
        self.seg_dir = seg_dir
        self.data = os.listdir(original_img_dir)
        self.targets = os.listdir(seg_dir)
        
        self.transform = transform
        self.target_transform = target_transform
        
        assert len(self.data) == len(self.targets)
        for d, target in zip(self.data, self.targets):
            assert d.split('.')[0] == target.split('.')[0]
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.transform(Image.open(self.original_img_dir / self.data[index])), \
                self.target_transform(Image.open(self.seg_dir / self.targets[index]))

In [3]:
train_root = Path('E:/Datasets/suichang_round1/suichang_round1_train')
original_img_dir = train_root / 'original_images'
seg_dir = train_root / 'segmentation'

In [4]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: x.convert('RGB')),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

target_transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.as_tensor(np.asarray(x), dtype = torch.int64)),
    transforms.Lambda(lambda x: x-1)
])

trainset = AerialDataset(original_img_dir, seg_dir, transform, target_transform)

trainloader = DataLoader(trainset, batch_size=16)

In [5]:
def imgshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    plt.imshow(npimg)
    
dataiter = iter(trainloader)

imgs, targets = dataiter.next()

imgs
# img = torchvison.utils.mk_gird()

  transforms.Lambda(lambda x: torch.as_tensor(np.asarray(x), dtype = torch.int64)),


tensor([[[[-1.3302, -1.3815, -1.4672,  ..., -0.3369, -0.3027, -0.3027],
          [-1.2617, -1.3302, -1.4329,  ..., -0.2342, -0.1657, -0.1314],
          [-1.2274, -1.3473, -1.4672,  ..., -0.1314, -0.0287,  0.0398],
          ...,
          [-1.0048, -1.0048, -0.9705,  ..., -1.4843, -1.4329, -1.3987],
          [-0.9705, -1.0219, -1.0048,  ..., -1.4843, -1.4329, -1.3644],
          [-1.1418, -1.2445, -1.2445,  ..., -1.4329, -1.4158, -1.3815]],

         [[-0.9153, -0.9678, -1.0903,  ..., -0.2325, -0.2150, -0.1975],
          [-0.7927, -0.8978, -1.0378,  ..., -0.1275, -0.0399,  0.0126],
          [-0.7227, -0.9153, -1.0903,  ..., -0.0049,  0.1176,  0.2577],
          ...,
          [-0.3550, -0.3725, -0.3200,  ..., -1.0028, -0.9328, -0.8803],
          [-0.3025, -0.3901, -0.3550,  ..., -1.0028, -0.9153, -0.8277],
          [-0.5651, -0.7052, -0.7227,  ..., -0.9153, -0.8803, -0.8277]],

         [[-0.5670, -0.6367, -0.7936,  ...,  0.1999,  0.1999,  0.1999],
          [-0.4450, -0.5670, -

In [7]:
model = deeplabv3_resnet50(pretrained=False, progress=False, num_classes=10)
model

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [67]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.

torch.Size([10, 256, 256])