In [1]:
# 读取训练数据, 训练集包含500个模型结构，以及这些结构在cplfw，market1501，dukemtmc等8个任务上的性能排序
import json
with open('./data/CVPR_2022_NAS_Track2_train.json', 'r') as f:
    train_data = json.load(f)

In [2]:
import pandas as pd
import numpy as np

In [3]:
target_cols = ['cplfw_rank', 'market1501_rank', 'dukemtmc_rank', 'msmt17_rank', 'veri_rank', 'vehicleid_rank', 'veriwild_rank', 'sop_rank']

In [4]:
def get_df(train_data):
    ret = []
    for k, v in train_data.items():
        tmp = list(v['arch'])
        tmp1 = []
        for c in target_cols:
            tmp1.append(v[c])
        ret.append(tmp+tmp1+[k,v['arch']])
    retf = pd.DataFrame(ret,columns=[f'col{_}' for _ in range(len(tmp))]+target_cols+['id','arch'])
    retf['col0'] = retf['col0'].map({'l':1, 'j':2, 'k':3})
    int_cols = [x for x in retf.columns if x not in ['id','arch']]
    retf[int_cols] = retf[int_cols].astype(float)
    return retf

In [5]:
train_df = get_df(train_data)

In [6]:
from scipy.special import erfinv 

for c in target_cols:
    train_y=train_df[c]
    mmin=np.min(train_y)+1
    mmax=np.max(train_y)+1
    train_y=np.sqrt(2) * erfinv(2 * (train_y+mmin)/(mmin+mmax)-1)
    train_df[c+'_trans_y'] = train_y

In [7]:
def get_step(train_df):
    res0 = []
    res1 = []
    res2 = []
    res3 = []
    res4 = []
    map_={'l':1, 'j':2, 'k':3}
    time_step=12
    head_ = set([1+_*3 for _ in range(12)])
    mlp_ = set([2+_*3 for _ in range(12)])
    emb_ = set([3+_*3 for _ in range(12)])
    depth_ = set([0])
    for item in train_df.arch:
        ret = np.array(list(item[1:]),dtype=np.float32).reshape(-1,3)
        res0.append([1 if x in head_ else 0 for x in range(37)])
        res1.append([1 if x in mlp_ else 0 for x in range(37)])
        res2.append([1 if x in emb_ else 0 for x in range(37)])
        res3.append([1 if x in depth_ else 0 for x in range(37)])
        res4.append([map_[item[0]]]+list(np.array(list(item[1:]),dtype=np.float32)))
        
    train_df['head'] = res0
    train_df['mlp'] = res1
    train_df['emb'] = res2
    train_df['depth'] = res3
    train_df['all_emb'] = res4

In [8]:
get_step(train_df)

In [9]:
from sklearn.model_selection import KFold
from tqdm import tqdm

In [10]:
#定义数据
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self,df,use_cols,target_cols,show=0):
        self.df = df
        self.show = show
        self.use_cols = use_cols
        self.target_cols = target_cols

        self.prepare_data()
        
    def __len__(self):
        return self.df.shape[0]
    
    def prepare_data(self):
        self.y = self.df[self.target_cols].values
        self.y0 = self.df[[self.target_cols[0]]].values
        self.y1 = self.df[[self.target_cols[1]]].values
        self.y2 = self.df[[self.target_cols[2]]].values
        self.y3 = self.df[[self.target_cols[3]]].values
        self.y4 = self.df[[self.target_cols[4]]].values
        self.y5 = self.df[[self.target_cols[5]]].values
        self.y6 = self.df[[self.target_cols[6]]].values
        self.y7 = self.df[[self.target_cols[7]]].values
        
        uc = ['all_emb']
        tmp_dt = {}
        for c in uc:
            tmp_dt[c] = np.array(self.df[c].tolist())
        self.inputs = np.concatenate([tmp_dt[c][:, None] for c in uc], 1).transpose(0, 2, 1)

        uc= ['all_emb','depth','head','mlp','emb']
        tmp_dt = {}
        for c in uc:
            tmp_dt[c] = np.array(self.df[c].tolist())
        self.inputs1= np.concatenate([tmp_dt[c][:, None] for c in uc], 1).transpose(0, 2, 1)
        
        
        if self.show==1:
            print('inputs_shape',self.inputs.shape)
            print('inputs1_shape',self.inputs1.shape)
            print('y_shape',self.y.shape)
        
    def __getitem__(self, idx):
        data = {
            "input": torch.tensor(self.inputs[idx], dtype=torch.float),
            "input1": torch.tensor(self.inputs1[idx], dtype=torch.float),
            "y": torch.tensor(self.y[idx], dtype=torch.float),
            "y0": torch.tensor(self.y0[idx], dtype=torch.float),
            "y1": torch.tensor(self.y1[idx], dtype=torch.float),
            "y2": torch.tensor(self.y2[idx], dtype=torch.float),
            "y3": torch.tensor(self.y3[idx], dtype=torch.float),
            "y4": torch.tensor(self.y4[idx], dtype=torch.float),
            "y5": torch.tensor(self.y5[idx], dtype=torch.float),
            "y6": torch.tensor(self.y6[idx], dtype=torch.float),
            "y7": torch.tensor(self.y7[idx], dtype=torch.float),
        }
        
        return data

In [12]:
use_cols = [x for x in train_df.columns if 'col' in x]

In [13]:
len(use_cols)

37

In [14]:
import gc
import os
import time
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup


def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    
def count_parameters(model, all=False):
    if all:
        return sum(p.numel() for p in model.parameters())
    else:
        return sum(p.numel() for p in model.parameters() if p.requires_grad)


def save_model_weights(model, modelpath, filename):
    torch.save(model.state_dict(), modelpath+filename)
    return f"\n -> Save weights to {modelpath+filename}\n"

In [15]:
import scipy

In [16]:
def compute_metric(pred, y):
    corr = []
    if pred.shape[1]>2:
        for i in range(8):
            corr.append(scipy.stats.stats.kendalltau(pred[:, i], y[:, i])[0])
    else:
        corr.append(scipy.stats.stats.kendalltau(pred, y)[0])
    return np.array(corr)


class CVPRLoss_tanh1(nn.Module):
    # kendall tanh
    def __call__(self, pred, y):
        return 1-torch.cat(
            [self.get_score(pred[:,i], y[:,i]).reshape(1) for i in range(y.shape[1])]
        ).reshape(1,-1)
    
    def get_score(self, outputs, labels):
        output1 = outputs.unsqueeze(1).repeat(1,outputs.shape[0])
        label1 = labels.unsqueeze(1).repeat(1,labels.shape[0])

        tmp = ((output1-output1.t())*torch.sign(label1-label1.t())).tanh()
        eye_tmp = tmp*torch.eye(tmp.shape[0]).cuda()
        new_tmp = tmp - eye_tmp
        
        loss = torch.sum(new_tmp)/(outputs.shape[0]*(outputs.shape[0]-1))

        return loss

In [17]:
class CVPRModel(nn.Module):
    def __init__(
        self,
        input_dim=3,
        num_classes=8,
        time_step=12,
        bi=True
    ):
        super(CVPRModel,self).__init__()
        
        self.bi_num = 2 if bi else 1
        self.time_step = time_step
        
        self.MLP = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
        )

        self.LSTM0 = nn.LSTM(256, 256, 
                    batch_first=True, 
                    bidirectional=bi, 
                    num_layers=1,
                    dropout=0)
                                 
        
        self.LSTM1 = nn.LSTM(512, 256, 
                    batch_first=True, 
                    bidirectional=bi, 
                    num_layers=1,
                    dropout=0
                            )    

        self.Logits = nn.Sequential(
            nn.Flatten(start_dim=1),
            nn.Linear((512+512+256+1)*self.time_step, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )
        
    def forward(self, x):
        x1 = self.MLP(x)
        x2, (h0, c0) = self.LSTM0(x1)
        x3, (h1, c1) = self.LSTM1(x2, (h0, c0))

        c1 = torch.cat([x3, x2, x1, x], -1)
        pred = self.Logits(c1)
        return pred

In [18]:
def train_sig(model, 
        train_dataset, 
        val_dataset, 
        verbose=20, 
        fold_=0,
        modelname='MLP_base',
        modelpath=r'./model'+'//',
        input='input',
        y='y',
        early_stop_round=60,
        debug=False):
    
    print(f'Model parameters count: {count_parameters(model)}')
    #数据加载
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_last=True,
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE_TEST,
        shuffle=False,
        drop_last=False,
        pin_memory=True
    )
    print(f'train batch num: {len(train_loader)}')
    print(f'val batch num: {len(val_loader)}')
            
    # Optimizer
    optimizer = getattr(torch.optim, optim)(model.parameters(), lr=LR)
    # Scheduler
    num_warmup_steps = int(0.1 * EPOCHS * len(train_loader))
    num_training_steps = int(EPOCHS * len(train_loader))
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps, num_training_steps
    )
    print(f'optim: {optim}, lr: {LR}, warmup_steps: {num_warmup_steps}')
    
    
    #train
    bst_epoch={_:0 for _ in range(8)}
    score_best={_:0 for _ in range(8)}
    first_epoch_eval=0
    for epoch in range(EPOCHS):
        
        model.train()
        model.zero_grad()
        start_time = time.time()

        avg_loss = 0
        for data in train_loader:
            pred = model(data[input].to(device))
#             print(pred.shape,data['y'].shape)

            loss = loss_fct(
                pred,
                data[y].to(device)
            ).mean()

            loss.backward()
            avg_loss += loss.item() / len(train_loader)

            optimizer.step()
            scheduler.step()

            optimizer.zero_grad(set_to_none=True)

        #VAL
        model.eval()
        mae, avg_val_loss = 0, 0
        preds = []

        with torch.no_grad():
            for data in val_loader:
                pred = model(data[input].to(device))

                loss = loss_fct(
                    pred,
                    data[y].to(device)
                ).mean()

                avg_val_loss += loss.item() / len(val_loader)

                preds.append(pred.detach().cpu().numpy())

        preds = np.concatenate(preds, 0)
        if y=='y':
            mae = compute_metric(preds,val_dataset.df[target_cols].values).mean()
        else:
            mae = compute_metric(preds,val_dataset.df[[target_cols[int(y.replace('y',''))]]].values).mean()
        
        elapsed_time = time.time() - start_time
        if (epoch + 1) % verbose == 0:
            elapsed_time = elapsed_time * verbose
            lr = scheduler.get_last_lr()[0]
    #         lr=LR
            print(
                f"Epoch {epoch + 1:02d}/{ EPOCHS:02d} \t lr={lr:.1e}\t t={elapsed_time:.0f}s \t"
                f"loss={avg_loss:.4f}",
                end="\t",
            )

            if (epoch + 1 >= first_epoch_eval) or (epoch + 1 == EPOCHS):
                print(f"val_loss={avg_val_loss:.4f}\tcorr={mae:.4f}")
            else:
                print("")
                
        #保存最优模型
        score1 = compute_metric(preds,val_dataset.df[target_cols].values)
        for i in range(8):
            if score1[i] > score_best[i]:
                bst = save_model_weights(model, modelpath, f'{modelname}_target{i}_{fold_}.pt')
                score_best[i] = score1[i]
                bst_epoch[i] = epoch
#                 print(f'target{i} best score {score_best[i]}, best epoch: {bst_epoch[i]}, {bst} ' )
                
    bst_list = [score_best[i] for i in range(8)]
    print(bst_list,'\n\n')
    del (val_loader, train_loader, loss, data, pred)
    gc.collect()
    torch.cuda.empty_cache()
    
    return bst_list


In [20]:
# K折训练，同时
seed = 666
BATCH_SIZE = 400
BATCH_SIZE_TEST = 128
EPOCHS = 300
LR = 0.001
optim = "Adam"
device = "cuda" if torch.cuda.is_available() else "cpu"
modelpath = r'./model/'

loss_fct = CVPRLoss_tanh1()
k=5
scoref = []
skf = KFold(n_splits=k, shuffle=False)
for index, (train_index, test_index) in enumerate(skf.split(train_df)):   
    print(f'FOLD {index}')
    train0 = train_df.iloc[train_index]
    val0 = train_df.iloc[test_index]   
    train_dataset = MyDataset(train0, use_cols, target_cols)
    val_dataset = MyDataset(val0, use_cols, target_cols)
    print(f'train size: {len(train0)}, val size: {len(val0)}')

    modelname = f'CVPR_2022_lstm2y_catall_tanh1_sig'
    seed_everything(seed)
    model = CVPRModel(input_dim=1,
                    num_classes=8,
                    bi=True,
                    time_step=37
                   ).to(device)

    _ = train_sig(model, 
                train_dataset, 
                val_dataset, 
                verbose=10, 
                fold_=index,
                modelname=modelname,
                modelpath=modelpath,
                input='input',
                y='y',
                debug=False
                 )
    scoref.append(_)
scoreff = scoref
print(np.round(np.array(scoreff).mean(1).mean(), 5), [round(x, 5) for x in np.array(scoreff).mean(0)])

FOLD 0
train size: 400, val size: 100
Model parameters count: 102162440
train batch num: 1
val batch num: 1
optim: Adam, lr: 0.001, warmup_steps: 30
Epoch 10/300 	 lr=3.3e-04	 t=1s 	loss=0.9267	val_loss=0.9024	corr=0.3262
Epoch 20/300 	 lr=6.7e-04	 t=1s 	loss=0.5197	val_loss=0.5445	corr=0.4587
Epoch 30/300 	 lr=1.0e-03	 t=3s 	loss=0.3986	val_loss=0.4288	corr=0.5718
Epoch 40/300 	 lr=9.6e-04	 t=4s 	loss=0.3292	val_loss=0.3565	corr=0.6444
Epoch 50/300 	 lr=9.3e-04	 t=5s 	loss=0.2842	val_loss=0.3075	corr=0.6940
Epoch 60/300 	 lr=8.9e-04	 t=3s 	loss=0.2360	val_loss=0.2532	corr=0.7487
Epoch 70/300 	 lr=8.5e-04	 t=1s 	loss=0.1987	val_loss=0.2217	corr=0.7803
Epoch 80/300 	 lr=8.1e-04	 t=3s 	loss=0.1874	val_loss=0.2136	corr=0.7880
Epoch 90/300 	 lr=7.8e-04	 t=1s 	loss=0.1840	val_loss=0.2149	corr=0.7856
Epoch 100/300 	 lr=7.4e-04	 t=1s 	loss=0.1819	val_loss=0.2140	corr=0.7867
Epoch 110/300 	 lr=7.0e-04	 t=1s 	loss=0.1794	val_loss=0.2148	corr=0.7858
Epoch 120/300 	 lr=6.7e-04	 t=1s 	loss=0.1777	

In [21]:
#加载test
with open('./data/CVPR_2022_NAS_Track2_test.json', 'r') as f:
    test_data = json.load(f)

In [22]:
test_df = get_df(test_data)
get_step(test_df)

In [23]:
def pred_cuda(test_df, model):
    #获得预测
    test_dataset = MyDataset(test_df, use_cols, target_cols, 0)
    test_loader = DataLoader(
        test_dataset,
        batch_size=1024*8,
        shuffle=False,
        drop_last=False,
        pin_memory=True
    )

    preds = []
    model.eval()
    with torch.no_grad():
        for data in tqdm(test_loader):
            pred = model(data['input'].to(device))
            preds.append(pred.detach().cpu().numpy())
    preds = np.concatenate(preds, 0)
    print(preds.shape)

    del test_dataset, test_loader, model;
    gc.collect()
    torch.cuda.empty_cache()
    return preds
        
def predict_sig(test_df, k=5, modelname=''):
    for target in range(8):        
        cols = []
        for fold_ in range(k):
            modelname1 = f'{modelname}_target{target}_{fold_}.pt'
            print(f'Model {modelname1}')
            model = CVPRModel(input_dim=1,
                        num_classes=8,
                        bi=True,
                        time_step=37
                       ).to(device)
            model.load_state_dict(torch.load(modelpath+modelname1))

            pred_ = pred_cuda(test_df, model)[:, target]
            tmp_c = f'{target_cols[target]}_{fold_}'
            test_df[tmp_c] = pred_
            cols.append(tmp_c)
            print(f'Done {tmp_c}')
            
        print(cols)
        test_df[cols] = test_df[cols].rank()
        test_df[target_cols[target]] = test_df[cols].mean(axis=1).rank()

In [24]:
k, modelname

(5, 'CVPR_2022_lstm2y_catall_tanh1_sig')

In [25]:
predict_sig(test_df, k=5, modelname='CVPR_2022_lstm2y_catall_tanh1_sig')

Model CVPR_2022_lstm2y_catall_tanh1_sig_target0_0.pt


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done cplfw_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target0_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done cplfw_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target0_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done cplfw_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target0_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done cplfw_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target0_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done cplfw_rank_4
['cplfw_rank_0', 'cplfw_rank_1', 'cplfw_rank_2', 'cplfw_rank_3', 'cplfw_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target1_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done market1501_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target1_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done market1501_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target1_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done market1501_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target1_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done market1501_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target1_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done market1501_rank_4
['market1501_rank_0', 'market1501_rank_1', 'market1501_rank_2', 'market1501_rank_3', 'market1501_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target2_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done dukemtmc_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target2_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done dukemtmc_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target2_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done dukemtmc_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target2_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done dukemtmc_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target2_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done dukemtmc_rank_4
['dukemtmc_rank_0', 'dukemtmc_rank_1', 'dukemtmc_rank_2', 'dukemtmc_rank_3', 'dukemtmc_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target3_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done msmt17_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target3_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done msmt17_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target3_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done msmt17_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target3_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done msmt17_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target3_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done msmt17_rank_4
['msmt17_rank_0', 'msmt17_rank_1', 'msmt17_rank_2', 'msmt17_rank_3', 'msmt17_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target4_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veri_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target4_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veri_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target4_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veri_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target4_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veri_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target4_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veri_rank_4
['veri_rank_0', 'veri_rank_1', 'veri_rank_2', 'veri_rank_3', 'veri_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target5_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done vehicleid_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target5_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done vehicleid_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target5_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done vehicleid_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target5_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done vehicleid_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target5_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done vehicleid_rank_4
['vehicleid_rank_0', 'vehicleid_rank_1', 'vehicleid_rank_2', 'vehicleid_rank_3', 'vehicleid_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target6_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veriwild_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target6_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veriwild_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target6_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veriwild_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target6_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veriwild_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target6_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done veriwild_rank_4
['veriwild_rank_0', 'veriwild_rank_1', 'veriwild_rank_2', 'veriwild_rank_3', 'veriwild_rank_4']
Model CVPR_2022_lstm2y_catall_tanh1_sig_target7_0.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done sop_rank_0
Model CVPR_2022_lstm2y_catall_tanh1_sig_target7_1.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done sop_rank_1
Model CVPR_2022_lstm2y_catall_tanh1_sig_target7_2.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done sop_rank_2
Model CVPR_2022_lstm2y_catall_tanh1_sig_target7_3.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done sop_rank_3
Model CVPR_2022_lstm2y_catall_tanh1_sig_target7_4.pt


  0%|          | 0/13 [00:00<?, ?it/s]

(99500, 8)
Done sop_rank_4
['sop_rank_0', 'sop_rank_1', 'sop_rank_2', 'sop_rank_3', 'sop_rank_4']


In [26]:
test_df[target_cols] = test_df[target_cols].astype(int)-1

In [27]:
test_df[target_cols].describe()

Unnamed: 0,cplfw_rank,market1501_rank,dukemtmc_rank,msmt17_rank,veri_rank,vehicleid_rank,veriwild_rank,sop_rank
count,99500.0,99500.0,99500.0,99500.0,99500.0,99500.0,99500.0,99500.0
mean,49749.420291,49749.431859,49749.435447,49749.426925,49749.433176,49749.427518,49749.435327,49749.427528
std,28723.320935,28723.320063,28723.320508,28723.320596,28723.320212,28723.319067,28723.320314,28723.320348
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,24874.75,24874.5,24874.75,24874.75,24874.75,24874.0,24874.0,24874.0
50%,49749.5,49749.5,49749.5,49749.5,49749.5,49749.5,49749.5,49749.5
75%,74624.25,74624.25,74624.25,74624.25,74624.25,74624.25,74624.25,74624.25
max,99499.0,99499.0,99499.0,99499.0,99499.0,99499.0,99499.0,99499.0


In [28]:
test_df[target_cols].nunique()

cplfw_rank         90502
market1501_rank    91932
dukemtmc_rank      92333
msmt17_rank        91219
veri_rank          92062
vehicleid_rank     91418
veriwild_rank      92327
sop_rank           91358
dtype: int64

In [29]:
modelname

'CVPR_2022_lstm2y_catall_tanh1_sig'

In [30]:
# to_sub
def to_sub(test_df, test_data, name='CVPR_2022_lgb_score'):
    for i in tqdm(test_df[['id']+target_cols].values):
        id_ = i[0]
        for k,v in enumerate(target_cols):
            k += 1
            test_data[id_][v] = i[k]
            
    with open(f'./sub/{name}.json', 'w') as f:
        json.dump(test_data, f)

In [31]:
to_sub(test_df, test_data, name='CVPR_2022_lstm2y_catall_tanh1_sig')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


  0%|          | 0/99500 [00:00<?, ?it/s]