In [2]:
# 目标是先建立一个非常强的baseline；数据端先简易来；模型端好好弄，loss好好弄， eval好好弄;

In [None]:
class Fake_Spatial_SE(nn.Module):
    '''
    conv2d(channel_in, channel_out, kernel=(1,1))
        # H_new = [(H_old+padding-1)/stride]+1
        # 这一层卷积操作会是的原始feature map[bs, channel_in, H, W] --> [bs, channel_out, H, W]
    '''
    def __init__(self, channel, num_cls=11):
        super(Fake_Spatial_SE, self).__init__()
        self.squeeze = nn.Conv2d(channel, num_cls, kernel_size=1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
#         ipdb.set_trace()
        z = self.squeeze(x) # bs, num_cls, H, W
        z = self.sigmoid(z) # bs, num_cls, H, W
        return z 
    
    
class FakeDecoder(nn.Module):
    def __init__(self, up_in, x_in, n_out=11):
        '''
        up_in: channel of skip conneted layer; 
        x_in: channel of last layer;
        n_out: channel of output;
        '''
        super(FakeDecoder, self).__init__()
        self.x_conv = nn.Conv2d(x_in, n_out, 1, bias=False)
        self.tr_conv = nn.ConvTranspose2d(up_in, n_out, 2, stride=2)
        self.bn = nn.BatchNorm2d(n_out)
        self.relu = nn.ReLU(True)
        self.fake_sSE = Fake_Spatial_SE(channel=n_out)

    def forward(self, up_p, x_p):
#         ipdb.set_trace()
        up_p = self.tr_conv(up_p) # [bs, 256, 8, 8] --> [bs, num_cls, 16, 16]
        fake_sSE = self.fake_sSE(up_p) # [bs, num_cls, 16, 16]
        x_p = self.x_conv(x_p) # [bs, 11, 16, 16] -->  [bs, num_cls, 16, 16]
        return x_p*fake_sSE # [bs, 11, 16, 16]
    


class Unet_qb(nn.Module):
    def __init__(self, num_class=11):
        '''
        up_in: channel of skip conneted layer; 
        x_in: channel of last layer;
        n_out: channel of output;
        '''
        super(Unet_qb, self).__init__()
        self.resnet = torchvision.models.resnet34(True)
        self.conv1 = nn.Sequential( self.resnet.conv1, self.resnet.bn1, self.resnet.relu)
        self.encode2 = nn.Sequential(self.resnet.layer1, Spatial_Channel_SE(64))
        self.encode3 = nn.Sequential(self.resnet.layer2, Spatial_Channel_SE(128))
        self.encode4 = nn.Sequential(self.resnet.layer3, Spatial_Channel_SE(256))
        self.encode5 = nn.Sequential(self.resnet.layer4, Spatial_Channel_SE(512))
        self.center = nn.Sequential(PyramidAttention(512, 256), nn.MaxPool2d(2, 2)) 
        self.decode5 = FakeDecoder(256, 512, num_class)
        self.decode4 = FakeDecoder(64, 256, num_class)
        self.decode3 = FakeDecoder(64, 128, num_class)
        self.decode2 = FakeDecoder(64, 64, num_class)
        self.decode1 = FakeDecoder(64, 32, num_class)
        self.logit = nn.Sequential(nn.Conv2d(320, 64, kernel_size=3, padding=1),
                                   nn.ELU(True),
                                   nn.Conv2d(64, num_class, kernel_size=1, bias=False))
        self.logit_image = nn.Linear(256, num_class)
        
    def forward(self, x):
        # x: (batch_size, 3, 256, 256)
        x = self.conv1(x)  # 64, 128, 128
        e2 = self.encode2(x)  # 64, 128, 128
        e3 = self.encode3(e2)  # 128, 64, 64
        e4 = self.encode4(e3)  # 256, 32, 32
        e5 = self.encode5(e4)  # 512, 16, 16
        f = self.center(e5)  # 256, 8, 8
        for_cls = F.adaptive_avg_pool2d(f, output_size=1) # 256
        d5 = self.decode5(f, e5)  # num_cls, 16, 16
        d4 = self.decode4(d5, e4)  # num_cls, 32, 32
        d3 = self.decode3(d4, e3)  # num_cls, 64, 64
        d2 = self.decode2(d3, e2)  # num_cls, 128, 128
        d1 = self.decode1(d2, x)  # num_cls, 256, 256
        f = torch.cat((d1,
                       F.upsample(d2, scale_factor=2, mode='bilinear', align_corners=True),
                       F.upsample(d3, scale_factor=4, mode='bilinear', align_corners=True),
                       F.upsample(d4, scale_factor=8, mode='bilinear', align_corners=True),
                       F.upsample(d5, scale_factor=16, mode='bilinear', align_corners=True)), 1)  # 320, 256, 256

        logit = self.logit(f)  # 11, 256, 256
        clf = self.logit_image(for_cls.view(-1, 256)) # bs, 11
        return logit, clf

In [4]:
# epoch acc

def valid_look(fold_id, pth=None):
    # 由于 img_ori shape 不一致，所以无法放到 loader 里以 batch的形式呈现，因此，bs必须等于1；
    # 之所以要 img_ori, 是因为想要画图展示，想要计算ori_bbox

    if pth is None:
        param = torch.load('./models/unet_49' + '.pth')  # stage3 use model pretrained with pseudo-labels
        model.load_state_dict(param)  # initialize with pretained weight
    model = model.to(device)
    model.eval()
    valid_data_tmp = Jiu_valid(fold_id=fold_id, mode='valid')
    loader = DataLoader(valid_data_tmp,
                            batch_size=1,
                            num_workers=8,
                            pin_memory=True)
    i = 0
    c_pred, c_label_list, bb1_list, bb1_list, img_w, img_h = [],[],[],[],[],[]
    for imgs, mask, C, bbox, H, W, img_ori in progress_bar(loader):   
        i+=1
        if i == 1:
            imgs = imgs.to(device)
            with torch.no_grad():
                mask_pred = model(imgs).to('cpu').softmax(1).argmax(1)[0] # w, h
                bbox_pred = mask2bbox_withscale(mask_pred.numpy(), H, W)
                img = img_ori[0].numpy() # h,w,3
                x, y, w, h = bbox.numpy()[0]
                x1, y1, w1, h1 = [i.tolist()[0] for i in bbox_pred]
                c_label_list.append(C)
                bb1_list.append([x, y, w, h])
                bb2_list.append([x1, y1, w1, h1])
                img_w.append(W)
                img_h.append(H)
                if 1:
                    cv2.rectangle(img, (x, y), (x + w, y + h), (1, 0, 0), 2)
                    cv2.rectangle(img, (x1, y1), (x1 + w1, y1 + h1), (0, 0, 1), 2)
                    vis.image(img_gt.transpose((2,0,1)), win='valid1', opts={'title':'valid_gt_{}'.format(C)})
                    vis.image(img_pred.transpose((2,0,1)), win='valid1', opts={'title':'img_pred'})
        else:
            break
        weight_dic = {1:0.15,
                    2:0.09,
                    3:0.09,
                    4:0.05,
                    5:0.13,
                    6:0.05,
                    7:0.12,
                    8:0.13,
                    9:0.07,
                    10:0.12}
    mAp = mAP(weight_dic, c_pred, c_label_list, bb1_list, bb1_list, img_w, img_h)
    return mAp



In [1]:
import cv2
import os
import torch
import json
import ipdb
import numpy as np
from torchvision import models
import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler
import matplotlib.pyplot as plt
from data import JiuData, do_resize, JiuTest
from model import *
from augment import *
from loss import multi_class_entropy, lovasz_softmax
from fastprogress.fastprogress import master_bar, progress_bar
from eval import mAP, mask2bbox_withscale, mask2bbox
from timeit import default_timer as timer
from visdom import Visdom


%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import ipdb

def time_to_str(t, mode='min'):
    from timeit import default_timer as timer
    
    if mode=='min':
        t  = int(t)/60
        hr = t//60
        min = t%60
        return '%2d hr %02d min'%(hr,min)
    elif mode=='sec':
        t   = int(t)
        min = t//60
        sec = t%60
        return '%2d min %02d sec'%(min,sec)
    else:
        raise NotImplementedError


def train(loader, model, dataset, device):
    model = model.to(device)
    running_loss = 0.
    model.train()
    for imgs, masks, cls, bbox, H, W in progress_bar(loader, parent=mb):
        optimizer.zero_grad()
#         ipdb.set_trace()
        imgs, masks, cls = imgs.to(device), masks.to(device), cls.to(device)
        with torch.set_grad_enabled(True):
            logits, cls_logits = model(imgs) # [bs, cls, H, W] [bs, cls]
            loss1 = multi_class_entropy(logits, masks.squeeze().int())
            loss2 = lovasz_softmax(logits.squeeze(), masks.squeeze().int(), per_image=False)
            loss3 = torch.nn.CrossEntropyLoss()(cls_logits, cls)
            loss = loss1 + loss2 + loss3
            loss.backward()
            optimizer.step()
        running_loss += loss.item()*imgs.size(0)
    return running_loss/len(dataset)


def valid(loader, model, dataset, device):
    model = model.to(device)
    run_loss, run_loss1, run_loss2, run_loss3 = 0, 0,0,0
    model.eval()
    for imgs, masks, cls, bbox, H, W in progress_bar(loader, parent=mb):    
        imgs, masks, cls = imgs.to(device), masks.to(device), cls.to(device)
        with torch.no_grad():
            logits, cls_logits = model(imgs)
            loss1 = multi_class_entropy(logits, masks.squeeze().int())
            loss2 = lovasz_softmax(logits.squeeze(), masks.squeeze().int(), per_image=False)
            loss3 = torch.nn.CrossEntropyLoss()(cls_logits, cls)
            loss = loss1 + loss2 + loss3
        run_loss += loss.item()*imgs.size(0)
        run_loss1 += loss1.item()*imgs.size(0)
        run_loss2 += loss2.item()*imgs.size(0)
        run_loss3 += loss3.item()*imgs.size(0)
    return run_loss/len(dataset), run_loss1/len(dataset), run_loss2/len(dataset), run_loss3/len(dataset)


def valid_look(model, fold_id, show_num=3, pth=1):
    # logit # bs, 11, h, w 
    # mask_pred
    # bbox_pred
    # picture
    if pth is None:
        param = torch.load('./models/unet_49' + '.pth')  # stage3 use model pretrained with pseudo-labels
        model.load_state_dict(param)  # initialize with pretained weight
    model = model.to(device)
    model.eval()
    valid_data_tmp = JiuData(fold_id=fold_id, mode='valid', return_ori_img=True)
    loader = DataLoader(valid_data_tmp,
                        sampler = RandomSampler(valid_data_tmp),
                        batch_size=1,
                        num_workers=8,
                        pin_memory=True)
    i = 0
    for imgs, mask, C, bbox, H, W, img_ori in loader:   
        i+=1
        if i <= show_num:
            imgs = imgs.to(device)
            with torch.no_grad():
                mask_pred, cls_logit = model(imgs) # w, h
                mask_pred = mask_pred.to('cpu').softmax(1).argmax(1)[0]
                bbox_pred = mask2bbox_withscale(mask_pred.numpy(), H, W)
                img  = img_ori[0].numpy() # h,w,3
                img_2 = deepcopy(img)
                x, y, w, h = bbox.numpy()[0]
                x1, y1, w1, h1 = [i.tolist()[0] for i in bbox_pred]
                print('mask pred rectangle: {}/{}'.format((x1, y1), (x1 + w1, y1 + h1)))
                cv2.rectangle(img, (x, y), (x + w, y + h), (1, 0, 0), 2)
                cv2.rectangle(img_2, (x1, y1), (x1 + w1, y1 + h1), (0, 0, 1), 2)
                vis.image(img.transpose((2,0,1)), win='valid_gt', update='append', opts={'title':'valid_gt_{}'.format(C)})
                vis.image(img_2.transpose((2,0,1)), win='valid_pred', update='append', opts={'title2':'valid_pred_{}'.format(C)})
        else:
            break
    return 


def infer_upload(pre_train_pth=None):
    if pre_train_pth is None:
        param = torch.load('./models/unet_49' + '.pth')  # stage3 use model pretrained with pseudo-labels
        model.load_state_dict(param)  # initialize with pretained weight
    model = model.to(device)
    model.eval()
    dic = {}
    dic['images'] = []
    dic['annotations']=[]
    for index, tup in enumerate(progress_bar(test_loader)):  
        name, imgs, ori_h, ori_w = tup
        dic['images'].append({'file_name': name,
                              'id': index+1})

        imgs = imgs.to(device)
        with torch.no_grad():
            mask = model(imgs).to('cpu').softmax(1).argmax(1)[0] #w,h
            bbox_pred = mask2bbox_withscale(mask.numpy(), ori_h, ori_w)
            dic['annotations'].append({'image_id':index+1,
                           'bbox':bbox_pred,
                           'category_id':mask.max(),
                           'score':1})
    return dic


weight_dic = {1:0.15,
2:0.09,
3:0.09,
4:0.05,
5:0.13,
6:0.05,
7:0.12,
8:0.13,
9:0.07,
10:0.12}

In [2]:
batch_size = 20
model = Unet()
lr = 0.01
min_lr = 0.00015
EPOCH=200
snapshot = 4
scheduler_step = EPOCH//snapshot
device = 'cuda'
fold_id = 1

train_data = JiuData(fold_id=fold_id, mode='train', return_ori_img=False)
train_loader = DataLoader(
                    train_data,
                    shuffle=RandomSampler(train_data),
                    batch_size=batch_size,
                    num_workers=8,
                    pin_memory=True)

valid_data = JiuData(fold_id=fold_id, mode='valid', return_ori_img=False)
valid_loader = DataLoader(
                    valid_data,
                    shuffle=RandomSampler(valid_data),
                    batch_size=batch_size,
                    num_workers=8,
                    pin_memory=True)


testset = JiuTest()
test_loader = DataLoader(
                    testset,
                    batch_size=1,
                    num_workers=8,
                    pin_memory=True)


In [3]:
# !python -m visdom.server

In [None]:
vis = Visdom(env='jiujiu')

Setting up a new session...
Setting up a new session...
Setting up a new session...


In [5]:
optimizer = torch.optim.SGD(params = model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=scheduler_step, eta_min=min_lr)
start = timer()
mb = master_bar(range(EPOCH))

for epoch in mb:
    scheduler.step()
    epoch_loss = train(train_loader, model, train_data, device)
    l_all, l1, l2, l3 = valid(valid_loader, model, valid_data, device)
    valid_look(model, fold_id)
    vis.line(X=[epoch], Y=[[epoch_loss, l_all, l1, l2, l3]], opts=dict(markers=True, showlegend=True), win='loss', update='append' if epoch>0 else None)
    
    # 重置lr;
    if (epoch + 1) % scheduler_step == 0:
        torch.save(model.state_dict(),  './models/unet_aug01_'+ str(epoch) + '.pth')
        optimizer = torch.optim.SGD(params = model.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=scheduler_step, eta_min=min_lr)
        scheduler.step()
        

NameError: name 'deepcopy' is not defined

NameError: name 'deepcopy' is not defined

NameError: name 'deepcopy' is not defined