In [1]:
import os
import random
import time
import json
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from utils import label_accuracy_score
import cv2

import numpy as np
import pandas as pd

# 전처리를 위한 라이브러리
from pycocotools.coco import COCO
import torchvision
import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

# 시각화를 위한 라이브러리
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

plt.rcParams['axes.grid'] = False

print('pytorch version: {}'.format(torch.__version__))
print('GPU 사용 가능 여부: {}'.format(torch.cuda.is_available()))

print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

device = "cuda" if torch.cuda.is_available() else "cpu"   # GPU 사용 가능 여부에 따라 device 정보 저장

import torch.optim as optim
from torchvision import models
from torchvision.models import vgg16

pytorch version: 1.8.1+cu102
GPU 사용 가능 여부: False


RuntimeError: The NVIDIA driver on your system is too old (found version 10010). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver.

In [2]:
torch.cuda.is_available()http://www.nvidia.com/Download/index.aspx

False

In [2]:
import torchvision.models as models
import torch
import torch.nn as nn
from torch.nn import functional as F
import segmentation_models_pytorch as smp

def conv3x3_relu(in_ch, out_ch, rate=1):
    conv3x3_relu = nn.Sequential(nn.Conv2d(in_ch, 
                                           out_ch,
                                           kernel_size=3, 
                                           stride=1,
                                           padding=rate,
                                           dilation=rate),
                                 nn.ReLU())
    return conv3x3_relu

class ASPPConv(nn.Module):
    def __init__(self, inplanes, outplanes, kernel_size, padding, dilation):
        super(ASPPConv, self).__init__()
        self.atrous_conv = nn.Sequential(
            nn.Conv2d(inplanes, outplanes, kernel_size = kernel_size, 
                      stride = 1, padding = padding, dilation=dilation, bias = False),
            nn.BatchNorm2d(outplanes),
            nn.ReLU()
        )
        

    def forward(self, x):
        out = self.atrous_conv(x)
        
        return out
    
    
class ASPPPooling(nn.Module):
    def __init__(self, inplanes, outplanes):
        super(ASPPPooling, self).__init__()
        self.image_pool = nn.Sequential(
            nn.AdaptiveMaxPool2d((1,1)),
            nn.Conv2d(inplanes, outplanes, kernel_size=1, stride = 1, bias= False),
            nn.BatchNorm2d(outplanes),
            nn.ReLU()
        )
        

    def forward(self, x):
        out = self.image_pool(x)
        return out

    
class ASPP(nn.Module):
    def __init__(self, inplanes, outplanes,atrous_rates):
        super(ASPP, self).__init__()
        rates = atrous_rates
        
        self.aspp1 = ASPPConv(inplanes, outplanes, 1, padding=0, dilation=rates[0])
        self.aspp2 = ASPPConv(inplanes, outplanes, 3, padding=rates[1], dilation=rates[1])
        self.aspp3 = ASPPConv(inplanes, outplanes, 3, padding=rates[2], dilation=rates[2])
        self.aspp4 = ASPPConv(inplanes, outplanes, 3, padding=rates[3], dilation=rates[3])
        
        self.global_avg_pool = ASPPPooling(inplanes, outplanes)
        
        # concat후 다시 채널 수를 맞춰주기 위한 작업
        self.project = nn.Sequential(
            nn.Conv2d(outplanes*5, outplanes, 1, bias=False), 
            nn.BatchNorm2d(outplanes), 
            nn.ReLU(), 
            nn.Dropout(0.5)      
        )

    def forward(self, x):
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.interpolate(x5, size=x.size()[2:], mode='bilinear', align_corners=True) # image pooling부분은 원본 크기만큼 upsampling
        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
        
        out = self.project(x)
        return out
    
class DeepLabHead(nn.Sequential):
    def __init__(self, in_ch, out_ch, n_classes, atrous_rates):
        super(DeepLabHead, self).__init__()
        self.add_module("0", ASPP(in_ch, out_ch,atrous_rates))
        self.add_module("1", nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1 , bias=False)) # passing convolution
        self.add_module("2", nn.BatchNorm2d(out_ch))
        self.add_module("3", nn.ReLU())
        self.add_module("4", nn.Conv2d(out_ch, n_classes, kernel_size=1, stride=1)) # classification

class ResNextDeepLabV3EncoderPretrain(nn.Sequential):
    """인코더 부분만 pretrain된 모델
    """
    def __init__(self, n_classes, atrous_rates):
        super(ResNextDeepLabV3EncoderPretrain, self).__init__()
        backbone = models.resnext101_32x8d(pretrained=True)
        self.encoder = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool,
            backbone.layer1,
            backbone.layer2,
            backbone.layer3,
            backbone.layer4
        )
        self.decoder = DeepLabHead(in_ch=2048, out_ch=256, n_classes=12,atrous_rates=atrous_rates)

    def forward(self, x):
        h = self.encoder(x)
        h = self.decoder(h)
        output = F.interpolate(h, size=x.shape[2:], mode="bilinear", align_corners=False)
        
        return output

class ResNextDeepLabV3AllTrain(nn.Module):
    def __init__(self, in_channels = 3, classes = 12):
        super(ResNextDeepLabV3AllTrain, self).__init__()
        self.backbone = smp.DeepLabV3(
            encoder_name="resnext101_32x8d",
            encoder_weights= "imagenet",
            in_channels=in_channels,
            classes=classes
            )

    def forward(self, x):
        output = self.backbone(x)
        
        return output

if __name__ == '__main__':
    model = ResNextDeepLabV3EncoderPretrain(12,[1, 12, 24, 36])
    x = torch.randn([2, 3, 512, 512])
    print("input shape : ", x.shape)
    out = model(x)
    print("output shape : ", out.size())

    model = ResNextDeepLabV3AllTrain(3,12)
    x = torch.randn([2, 3, 512, 512])
    print("input shape : ", x.shape)
    out = model(x)
    print("output shape : ", out.size())


Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /opt/ml/.cache/torch/checkpoints/resnext101_32x8d-8ba56ff5.pth


  0%|          | 0.00/340M [00:00<?, ?B/s]

input shape :  torch.Size([2, 3, 512, 512])
output shape :  torch.Size([2, 12, 512, 512])
input shape :  torch.Size([2, 3, 512, 512])
output shape :  torch.Size([2, 12, 512, 512])


In [3]:
model = ResNextDeepLabV3EncoderPretrain(12,[1, 12, 24, 36])

In [4]:
model.eval()

ResNextDeepLabV3EncoderPretrain(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample

In [6]:
model = DeepLabV3(num_classes=12)

x = torch.randn([2, 3, 512, 512])
print("input shape : ", x.shape)
out = model(x).to(device)
print("output shape : ", out.size())


input shape :  torch.Size([2, 3, 512, 512])
output shape :  torch.Size([2, 2048, 64, 64])


In [5]:
resnext = models.resnext101_32x8d(pretrained=True)

In [6]:
resnext.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1