In [1]:
import pandas as pd
import cv2
import glob

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import os, sys, codecs, glob
from PIL import Image, ImageDraw

import numpy as np
import pandas as pd
import cv2

import torch
torch.backends.cudnn.benchmark = False
# torch.backends.cudnn.enabled = False

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

In [3]:
class XunFeiDataset(Dataset):
    def __init__(self, img_path, img_group, transform):
        self.img_path = img_path
        self.transform = transform
        self.group = img_group

    def __getitem__(self, index):
        img = Image.open(self.img_path[index]).convert('RGB')
        
        if self.transform is not None:
            img = self.transform(img)
        
        return img, self.group[index]

    def __len__(self):
        return len(self.img_path)

In [4]:
train_df = pd.read_csv('./电商图像检索_数据集/train.csv')
train_df['path'] = './电商图像检索_数据集/train/' + train_df['name']

train_df['group'] = pd.factorize(train_df['label'])[0]
train_df['fold'] = train_df['group'] % 5

train_df = train_df.sort_values(by='group')
train_df.head(10)

Unnamed: 0,name,label,path,group,fold
0,008233.jpg,008233.jpg 006688.jpg,./电商图像检索_数据集/train/008233.jpg,0,0
1,006688.jpg,008233.jpg 006688.jpg,./电商图像检索_数据集/train/006688.jpg,0,0
2,000232.jpg,000232.jpg 003552.jpg,./电商图像检索_数据集/train/000232.jpg,1,1
3,003552.jpg,000232.jpg 003552.jpg,./电商图像检索_数据集/train/003552.jpg,1,1
4,000814.jpg,000814.jpg 013765.jpg,./电商图像检索_数据集/train/000814.jpg,2,2
5,013765.jpg,000814.jpg 013765.jpg,./电商图像检索_数据集/train/013765.jpg,2,2
6,001429.jpg,001429.jpg 014834.jpg,./电商图像检索_数据集/train/001429.jpg,3,3
7,014834.jpg,001429.jpg 014834.jpg,./电商图像检索_数据集/train/014834.jpg,3,3
8,012795.jpg,012795.jpg 015860.jpg,./电商图像检索_数据集/train/012795.jpg,4,4
9,015860.jpg,012795.jpg 015860.jpg,./电商图像检索_数据集/train/015860.jpg,4,4


In [5]:
tr_path = train_df[train_df['fold'] != 0]['path'].values
tr_label = train_df[train_df['fold'] != 0]['group']
tr_label = pd.factorize(tr_label)[0]

val_path = train_df[train_df['fold'] == 0]['path'].values
val_label = train_df[train_df['fold'] == 0]['group'].values

In [6]:
class ArcModule(nn.Module):
    def __init__(self, in_features, out_features, s = 10, m = 0.1):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_normal_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = torch.tensor(math.cos(math.pi - m))
        self.mm = torch.tensor(math.sin(math.pi - m) * m)

    def forward(self, inputs, labels):
        cos_th = F.linear(inputs, F.normalize(self.weight))
        cos_th = cos_th.clamp(-1, 1)
        sin_th = torch.sqrt(1.0 - torch.pow(cos_th, 2))
        cos_th_m = cos_th * self.cos_m - sin_th * self.sin_m
        # print(type(cos_th), type(self.th), type(cos_th_m), type(self.mm))
        cos_th_m = torch.where(cos_th > self.th, cos_th_m, cos_th - self.mm)
        
        cond_v = cos_th - self.th
        cond = cond_v <= 0
        cos_th_m[cond] = (cos_th - self.mm)[cond]

        if labels.dim() == 1:
            labels = labels.unsqueeze(-1)
        onehot = torch.zeros(cos_th.size()).cuda()
        labels = labels.type(torch.LongTensor).cuda()
        onehot.scatter_(1, labels, 1.0)
        outputs = onehot * cos_th_m + (1.0 - onehot) * cos_th
        outputs = outputs * self.s
        return outputs

In [7]:
import timm
timm.create_model('efficientnet_b3', num_classes=137, 
                          pretrained=True).classifier

Linear(in_features=1536, out_features=137, bias=True)

In [8]:
import timm

class XunFeiNet(nn.Module):
    def __init__(self, nclass):
        super(XunFeiNet, self).__init__()
                
        model = timm.create_model('efficientnet_b3', num_classes=137, 
                          pretrained=True)
        model.classifier = torch.nn.Identity()
        self.model = model
        self.margin = ArcModule(in_features=1536, 
                                out_features = nclass)
        
    def forward(self, img, labels=None):        
        feat = self.model(img)
        
        feat = F.normalize(feat)
        if labels is not None:
            return self.margin(feat, labels)
        return feat

In [9]:
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()

    for i, (input, target) in enumerate(train_loader):
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        output = model(input, target)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
def validate(val_loader, model):
    model.eval()
    
    val_feats = []
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            val_feats.append(output.data.cpu().numpy())
    return val_feats

In [10]:
def set_iou(label, predict):
    interset = set(label.split()) &  set(predict.split())
    unionset = set(label.split()) | set(predict.split())
    return len(interset) *1.0 / len(unionset) *1.0

In [12]:
from sklearn.preprocessing import normalize

for fold in range(5):
    tr_path = train_df[train_df['fold'] != fold]['path'].values
    tr_label = train_df[train_df['fold'] != fold]['group']
    tr_label = pd.factorize(tr_label)[0]

    val_path = train_df[train_df['fold'] == fold]['path'].values
    val_label = train_df[train_df['fold'] == fold]['group'].values

    print(f'FOLD {fold}, Face Count {tr_label.max()}')
    
    train_loader = torch.utils.data.DataLoader(
        XunFeiDataset(tr_path, tr_label,
                            transforms.Compose([
                            transforms.Resize((300, 300)),
                            transforms.RandomHorizontalFlip(),
                            transforms.RandomVerticalFlip(),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ),
        batch_size=10, shuffle=True, num_workers=5,
    )

    val_loader = torch.utils.data.DataLoader(
        XunFeiDataset(val_path, val_label,
                            transforms.Compose([
                            transforms.Resize((300, 300)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ),
        batch_size=10, shuffle=False, num_workers=5,
    )

    model = XunFeiNet(tr_label.max()+1).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), 0.0003)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.85)
    fold_f1 = 0.0
    
    for epoch in range(8):
        print('Epoch: ', epoch)

        train(train_loader, model, criterion, optimizer, epoch)
        scheduler.step()

        val_feats = validate(val_loader, model)
        val_feats = np.vstack(val_feats)
        # val_feats = normalize(val_feats)

        val_distance = []
        for feat in val_feats:
            dis = np.dot(feat, val_feats.T)
            val_distance.append(dis)

        best_threahold, best_f1 = 0, 0
        for threahold in np.linspace(0.5, 0.99, 20):
            val_submit = []
            for dis in val_distance[:]:
                pred = np.where(dis > threahold)[0]
                if len(pred) == 1:
                    ids = dis.argsort()[::-1]
                    pred = [x for x in ids[dis[ids] > 0.1]][:2]

                val_submit.append(pred)

            val_f1s = []
            for x, pred in zip(val_label, val_submit):
                label = np.where(val_label == x)[0]
                val_f1 = len(set(pred) & set(label)) / len(set(pred) | set(label)) 
                val_f1s.append(val_f1)

            if best_f1 < np.mean(val_f1s):
                best_f1 = np.mean(val_f1s)
                best_threahold = threahold

        if fold_f1 < best_f1:
            torch.save(model.state_dict(), 'model_{0}.pt'.format(fold))
            fold_f1 = best_f1
        print('Val', best_threahold, best_f1)

FOLD 0, Face Count 1805
Epoch:  0
Val 0.8352631578947368 0.5320271540529868
Epoch:  1
Val 0.7836842105263158 0.57745329110047
Epoch:  2
Val 0.7321052631578947 0.6243946567204881
Epoch:  3
Val 0.7063157894736842 0.6344827049794179
Epoch:  4
Val 0.6289473684210526 0.6595472248796937
Epoch:  5
Val 0.6289473684210526 0.6680417208030003
Epoch:  6
Val 0.5773684210526315 0.6787096838992308
Epoch:  7
Val 0.6289473684210526 0.6733302010324445
FOLD 1, Face Count 1805
Epoch:  0
Val 0.8610526315789473 0.5498404909349248
Epoch:  1
Val 0.7836842105263158 0.6100791957694144
Epoch:  2
Val 0.7836842105263158 0.6501454066648876
Epoch:  3
Val 0.7063157894736842 0.6730811399264057
Epoch:  4
Val 0.7321052631578947 0.6847359816223564
Epoch:  5
Val 0.7063157894736842 0.7002876540219989
Epoch:  6
Val 0.6547368421052632 0.7051556184050793
Epoch:  7
Val 0.6289473684210526 0.7175199781825079
FOLD 2, Face Count 1805
Epoch:  0
Val 0.8352631578947368 0.5416984446484325
Epoch:  1
Val 0.8094736842105263 0.58136284381

In [13]:
test_path = glob.glob('./电商图像检索_数据集/test/*')
test_path.sort()
test_path = np.array(test_path)

test_loader = torch.utils.data.DataLoader(
    XunFeiDataset(test_path, [0]*len(test_path),
                        transforms.Compose([
                        transforms.Resize((300, 300)),
                        transforms.ToTensor(),
                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    ),
    batch_size=50, shuffle=False, num_workers=5,
)

In [16]:
test_feats_fold = []
for fold, path in zip(range(5),
    ['model_0.pt', 'model_1.pt', 'model_2.pt', 'model_3.pt', 'model_4.pt']):
    tr_path = train_df[train_df['fold'] != fold]['path'].values
    tr_label = train_df[train_df['fold'] != fold]['group']
    tr_label = pd.factorize(tr_label)[0]
    model = XunFeiNet(tr_label.max()+1).cuda()
    
    model.load_state_dict(torch.load(path))
    model.eval()
    test_feats = []
    with torch.no_grad():
        for data in test_loader:
            data = data[0].cuda()
            feat = model(data)
            test_feats.append(feat.data.cpu().numpy())

    test_feats = np.vstack(test_feats)
    test_feats = normalize(test_feats)
    test_feats_fold.append(test_feats)

In [24]:
test_feats = np.stack(test_feats_fold).mean(0)

In [54]:
test_submit = []
for path, feat in zip(test_path[:], test_feats[:]):
    dis = np.dot(feat, test_feats.T)
    pred = [x.split('/')[-1] for x in test_path[np.where(dis > 0.3)[0]]]
    if len(pred) <= 1:
        ids = dis.argsort()[::-1]
        pred = [x.split('/')[-1] for x in test_path[ids[:2]]]
    
    test_submit.append([
        path.split('/')[-1],
        pred
    ])

In [55]:
dis.max()

0.35748896

In [56]:
test_submit = pd.DataFrame(test_submit, columns=['name', 'label'])
test_submit['label'] = test_submit['label'].apply(lambda x: ' '.join(x))
test_submit
test_submit.to_csv('submit.csv',index=None)

# 提交线上0.580

In [49]:
# 做了扩展查询
test_submit = []
for path, feat in zip(test_path[:], test_feats[:]):
    dis = np.dot(feat, test_feats.T)

    feat_qe = np.multiply(dis[np.argsort(dis)[::-1][:2]].reshape(2, -1),
            test_feats[np.argsort(dis)[::-1][:2]]).mean(0)
    dis = np.dot(feat_qe, test_feats.T)
    
    pred = [x.split('/')[-1] for x in test_path[np.where(dis > 0.1)[0]]]
    if len(pred) <= 1:
        ids = dis.argsort()[::-1]
        pred = [x.split('/')[-1] for x in test_path[ids[:2]]]
    
    test_submit.append([
        path.split('/')[-1],
        pred
    ])

In [50]:
test_submit = pd.DataFrame(test_submit, columns=['name', 'label'])
test_submit['label'] = test_submit['label'].apply(lambda x: ' '.join(x))
test_submit
test_submit.to_csv('submit_qe.csv',index=None)

In [48]:
dis.max()

0.12779833