In [1]:
import os
import torch
import pandas as pd
import numpy as np

In [2]:
DataDir = '../data1/'
DataListDir = '../new_whale/pic_list/'
from torch.utils.data.dataset import Dataset
import numpy as np
import json
import cv2
import os
import torch


class WhaleData(Dataset):
    def __init__(self, mode='train', fold_id=0, image_size=(128,256)):
        super(WhaleData, self).__init__()
        assert fold_id in [0,1,2,3,4,5]
        assert mode in ['train', 'test', 'valid']
        self.pic_dir = '{}/train/'.format(DataDir)
        self.mode = mode
        self.image_size = image_size
        with open(DataListDir+'whale_dict.json', 'r') as f:
            self.image_label_dict = json.load(f)
        if mode == 'train':
            pic_list_path = '{}/train.txt'.format(DataListDir)
        elif mode == 'valid':
            pic_list_path = '{}/valid_{}.txt'.format(DataListDir, fold_id)
        elif mode == 'test':
            pic_list_path = '{}/test.txt'.format(DataListDir)
        with open(pic_list_path, 'r') as f:
#             self.pic_list = f.readlines()[:2**7]
            self.pic_list = f.readlines()
    
    def __getitem__(self, index):
        if self.mode== 'test':
            pic_name = self.pic_list[index].split('\n')[0]
            image_path = DataDir+'test/' + pic_name
            image = cv2.imread(image_path, 1)
            image = cv2.resize(image, (self.image_size[1], self.image_size[0]))
            image = np.transpose(image, (2, 0, 1))
            image = image.astype(np.float32)
            image = image.reshape([-1, self.image_size[0], self.image_size[1]])
            image = image / 255.0
            return torch.FloatTensor(image)
        else:
            pic_name= self.pic_list[index].split(',')[0]
            pic_label = self.pic_list[index].split(',')[1].split('\n')[0]
            image_path = DataDir+'train/' + pic_name
            image = cv2.imread(image_path, 1)
            image = cv2.resize(image, (self.image_size[1], self.image_size[0]))
            image = np.transpose(image, (2, 0, 1))
            image = image.astype(np.float32)
            image = image.reshape([-1, self.image_size[0], self.image_size[1]])
            image = image / 255.0
            return torch.FloatTensor(image), self.image_label_dict[pic_label]

    def __len__(self):
        return len(self.pic_list)
    
    
# # 1个训练集合, 5个验证集, 1个测试集 先存到本地；
# valid 40% new_whale, 60% 随机从 count>=2 里面采样；可以重复；
## train 必须拿到所有count==1的，且 其它每个id必须要有至少1个。剩下的是valid池子；

# z = pd.read_csv('../data1/train.csv')

# 构建 {id:[pics]}
# Dic = {}
# for i in range(z.shape[0]):
#     img, l = z.iloc[i].Image, z.iloc[i].Id
#     if l not in Dic:
#         Dic[l] = []
#         Dic[l].append(img)
#     else:
#         Dic[l].append(img)
        
# # 从Dic中采样train
# Train_list = []
# for i in Dic:
#     if i=='new_whale':
#         new_whale_train_list = random.sample(Dic[i], 5000)
#         Train_list.extend(new_whale_train_list)
#     else:
#         all_lis = Dic[i]
#         leng = len(all_lis)
#         if leng==1:
#             sample_num=1
#         elif leng>1:
#             sample_num = leng//2
#         else:
#             raise Exception('leng==0 not allowed')
#         sample_tra_list = random.sample(all_lis, sample_num)
#         Train_list.extend(sample_tra_list)

# # valid 池子 划分为5个
# Valid_list = list(set(z.Image) - set(Train_list)) 


# import os
# dir_path = '/home/qibo/all_project/vision/new_whale/pic_list2'
# if not os.path.exists(dir_path):
#     os.makedirs(dir_path)

# with open('../new_whale/pic_list2/train.txt', 'w+') as f:
#     for pic_name in Train_list:
#         l = z.set_index('Image').loc[pic_name].Id
#         f.write('{},{}\n'.format(pic_name,l))
    

# sub_num = len(Valid_list)//5
# ZZ = z.set_index('Image')
# for fold_id in range(5):
#     with open('../new_whale/pic_list2/valid_{}.txt'.format(fold_id), 'w+') as f:
#         sub_valid_list = random.sample(Valid_list, sub_num)
#         for pic_name in sub_valid_list:
#             l = ZZ.loc[pic_name].Id
#             f.write('{},{}\n'.format(pic_name,l))

# with open('../new_whale/pic_list2/valid_{}.txt'.format(6), 'w+') as f:
#     for pic_name in Valid_list:
#         l = ZZ.loc[pic_name].Id
#         f.write('{},{}\n'.format(pic_name,l))

# zz = pd.read_csv('../data1/sample_submission.csv')
# with open('../new_whale/pic_list2/test.txt', 'w+') as f:
#     for line in zz.Image:
#         f.write('{}\n'.format(line))


In [3]:
# model resnet101 
from torch.nn import functional as F
import torch
from torch import nn
import torchvision.models as tvm
# from data import WhaleData

###########################################################################################3
class Net(nn.Module):
    # 看看to_be_transer_readme, 经典的resnet101一共四层，算上头尾一共6层；

    def __init__(self, num_class):
        super(Net,self).__init__()      
        self.basemodel = tvm.resnet101(pretrained=True)
        self.basemodel.avgpool = nn.AdaptiveAvgPool2d(1)
        
        # 这行意思是： 稍后用到 fine_tuning 需要冻结前面两层参数，
        # 由于resnet 预训练模型 前四步是 conv+bn+relu+pool, 为了方便，整理到一个layer0中；
        self.basemodel.layer0 = nn.Sequential(self.basemodel.conv1,
                                              self.basemodel.bn1,             
                                              self.basemodel.relu,            
                                              self.basemodel.maxpool)         
        emb_size = 2048
        self.qb_layer = nn.Linear(emb_size, num_class)

    def forward(self, x):
        mean = [0.485, 0.456, 0.406]  # rgb
        std = [0.229, 0.224, 0.225]     

        x = torch.cat([
            (x[:, [0]] - mean[0]) / std[0], 
            (x[:, [1]] - mean[1]) / std[1], 
            (x[:, [2]] - mean[2]) / std[2], 
        ], 1)

        x = self.basemodel.layer0(x)    
        x = self.basemodel.layer1(x)    
        x = self.basemodel.layer2(x)    
        x = self.basemodel.layer3(x)    
        x = self.basemodel.layer4(x)    
        x = self.basemodel.avgpool(x)
        fea = x.view(x.size(0), -1)     
        fea = self.qb_layer(fea)
        return fea
    
if __name__ == '__main__':
    WD = WhaleData(mode='train')
    test_sample_2 = torch.cat([WD[0][0].view(1, 3, 128, -1), WD[1][0].view(1, 3, 128, -1)])

    resnet = Net(num_class=5005)
    logit = resnet(test_sample_2)
    

In [4]:
from torch.utils.data import DataLoader
batch_size = 32

out_dir = os.path.join('./models/', 'resnet101')
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
if not os.path.exists(os.path.join(out_dir,'checkpoint')):
    os.makedirs(os.path.join(out_dir,'checkpoint'))
if not os.path.exists(os.path.join(out_dir,'train')):
    os.makedirs(os.path.join(out_dir,'train'))

        
train_dataset = WhaleData(mode='train')
valid_0 = WhaleData(mode='valid', fold_id=0)
valid_1 = WhaleData(mode='valid', fold_id=1)
valid_2 = WhaleData(mode='valid', fold_id=2)
valid_3 = WhaleData(mode='valid', fold_id=3)
valid_4 = WhaleData(mode='valid', fold_id=4)
valid_5 = WhaleData(mode='valid', fold_id=5)

In [5]:
valid_loader0  = DataLoader(valid_0, batch_size=batch_size, drop_last=False, num_workers=16)
valid_loader1  = DataLoader(valid_1, batch_size=batch_size, drop_last=False, num_workers=16)
valid_loader2  = DataLoader(valid_2, batch_size=batch_size, drop_last=False, num_workers=16)
valid_loader3  = DataLoader(valid_3, batch_size=batch_size, drop_last=False, num_workers=16)
valid_loader4  = DataLoader(valid_4, batch_size=batch_size, drop_last=False, num_workers=16)
valid_loader5  = DataLoader(valid_4, batch_size=batch_size, drop_last=False, num_workers=16)

In [6]:
use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")

net = Net(num_class=5005)
for p in net.basemodel.layer0.parameters(): 
    p.requires_grad = False
    
for p in net.basemodel.layer1.parameters(): 
    p.requires_grad = False
    
for p in net.basemodel.layer2.parameters(): 
    p.requires_grad = False
    
net = torch.nn.DataParallel(net)
net = net.to(device)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
                             lr=0.0001, betas=(0.9, 0.999),
                             eps=1e-08, weight_decay=0.0002)

In [7]:

def map_per_image(label, predictions):
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0

def map_per_set(labels, predictions):
    """Computes the average over multiple images.

    Parameters
    ----------
    labels : list: ['qb', 'qb', 'zac', 'jam', ... , 'zac']
             A list of the true labels. (Only one true label per images allowed!)
    predictions : list of list: [['qb', 'zac', 'ben', 'jer', 'gam'], ['qb', 'zac', 'ben', 'jer', 'gam'], ..., ['qb', 'zac', 'ben', 'jer', 'gam']]
             A list of predicted elements (order does matter, 5 predictions allowed per image)
    """
    return np.mean([map_per_image(l, list(p)) for l,p in zip(labels, predictions)])


def do_valid(net, valid_loader):
    loss1_list = []
    loss2_list = []
    label_list = []
    prob_list = []
    with torch.no_grad():
        for input, truth_, in valid_loader:
            input = input.to(device)
            truth_ = truth_.to(device)
            logit = net(input)
            loss1 = FocalLossQb(gamma=2)(logit, truth_)
            loss2 = bce_loss(logit, truth_)
#             ipdb.set_trace()
            _, top5_idx = logit.sigmoid().topk(5)
            loss1_list.append(loss1.item())
            loss2_list.append(loss2.item())
            label_list.extend(truth_.tolist())
            prob_list.extend(top5_idx.tolist())
    loss1 = np.mean(loss1_list)
    loss2 = np.mean(loss2_list)
    map_5 = map_per_set(label_list, prob_list)
    return loss1, loss2, loss1+loss2, map_5, label_list[:5], prob_list[:5]

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import ipdb

class FocalLossQb(nn.Module):
    def __init__(self, gamma=2):
        super(FocalLossQb, self).__init__()
        self.gamma = gamma

    def forward(self, input, target):
        # input: bs, cls;   target: bs, 1

        ## 对于其它cls, 其损失，每个样本 num_cls-1 个scalar: loss = - ((pt)**gamma) *(log(1-pt))
        ## 目标cls 用了个trick, 计算过程 log(1) = 0, 故不影响；
#         ipdb.set_trace()
        bs, cls = input.size()
        one_hot_target = torch.empty((bs, cls), device=device).zero_()
        one_hot_target.scatter_(1, target.view(-1,1), 1) # 把value=1 按照index=target.view(), 根据轴=1，填充到输入里；
#         ipdb.set_trace()
        pt = 1 - torch.sigmoid(input)*(1-one_hot_target)
        loss_other  = -1 * ((1-pt)**self.gamma) * pt.log()

        ## 对于目标cls, 其损失，每个样本一个scalar: loss = - ((1-pt)**gamma) *(logpt)
        logpt = torch.sigmoid(input).log()
        logpt = logpt.gather(1, target.view(-1,1)).view(-1)
        loss_target = -1 * ((1-logpt.exp())**self.gamma)*logpt
        loss = loss_target.sum() + loss_other.sum()
        return loss/bs/cls # 粗略平均，没有考虑目标cls, 此处由于cls=5005, 故不影响

def bce_loss(input, target):
    bs, cls = input.size()
    one_hot_target = torch.empty((bs, cls), device=device).zero_()
    one_hot_target.scatter_(1, target.view(-1,1), 1)
    loss = F.binary_cross_entropy_with_logits(input, one_hot_target)
    return loss


if __name__ == '__main__':
    batch_size = 4
    nb_digits = 10
    x = torch.rand(4,10)*random.randint(1,10)
    x[0][0]=1
    # print(x)

    y = torch.LongTensor(batch_size,1).random_() % nb_digits
    x = x.to(device)
    y = y.to(device)
    output0 = FocalLossQb(gamma=2)(x,y)
    output1 = bce_loss(x,y)

    print(output1.item())
    print(output0.item())

4.194912910461426
3.9853227138519287


In [12]:
EPOCH=10
i=0
batch_size=32

for epoch in range(EPOCH):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=False, num_workers=16)
    for input, truth_ in train_loader:
        i+=1
        input = input.to(device)
        truth_ = truth_.to(device)
        optimizer.zero_grad()
        logit = net(input)
        batch_loss1 = FocalLossQb(gamma=2)(logit, truth_)
        batch_loss2 = bce_loss(logit, truth_)
        loss = batch_loss1+batch_loss2
        loss.backward()
        optimizer.step()
        
        if i % 10 == 0:
            net.eval()
            train_loss1, train_loss2, train_loss, train_acc, label_5, pred_5 = do_valid(net, train_loader) 
            valid_loss1, valid_loss2, valid_loss, valid_acc, label_5_val, pred_5_val = do_valid(net, valid_loader0)
            net.train()
            print('--------------------------------------------------------------------')
            print('train_loss1:{:.4f} || train_loss2:{:.4f} || train_loss:{:.4f} || train_acc:{:.4f} ||'.format(train_loss1, train_loss2, train_loss, train_acc))
            print('valid_loss1:{:.4f} || valid_loss2:{:.4f} || valid_loss:{:.4f} || valid_acc:{:.4f} ||'.format(valid_loss1, valid_loss2, valid_loss, valid_acc))
            print('five_sample_label:\n{}\nfive_sample_predict:\n{}\n'.format(label_5, pred_5))
            

--------------------------------------------------------------------
train_loss1:0.0001 || train_loss2:0.0067 || train_loss:0.0067 || train_acc:0.9518 ||
valid_loss1:0.0008 || valid_loss2:0.0078 || valid_loss:0.0086 || valid_acc:0.3203 ||
five_sample_label:
[858, 858, 858, 858, 858]
five_sample_predict:
[[5004, 858, 4217, 2810, 2170], [5004, 858, 2810, 4217, 2170], [5004, 858, 2810, 4217, 3935], [5004, 858, 4217, 2810, 2170], [5004, 858, 2810, 4217, 3935]]

--------------------------------------------------------------------
train_loss1:0.0001 || train_loss2:0.0067 || train_loss:0.0068 || train_acc:0.9518 ||
valid_loss1:0.0008 || valid_loss2:0.0078 || valid_loss:0.0087 || valid_acc:0.3203 ||
five_sample_label:
[858, 858, 858, 858, 858]
five_sample_predict:
[[5004, 858, 4217, 2810, 2170], [5004, 858, 4217, 2810, 2170], [5004, 858, 2810, 4217, 3935], [5004, 858, 4217, 2810, 2170], [5004, 858, 4217, 2810, 3935]]

--------------------------------------------------------------------
train_l