# Active FULL Training Code (Basic - Global)

In [1]:
from datasets.wider_global_dataset import build_wider_dataloader
from datasets.wider_global_test_dataset import build_wider_test_dataloader
from models.encoder import Model
from evaluators.global_evaluator import GlobalEvaluator
from loss.loss import crossmodal_triplet_loss, cos_distance
from loggers.logger import Logger
from tqdm import tqdm_notebook as tqdm
from sklearn.neighbors import DistanceMetric
import os

import torch.nn as nn
import torch.optim as optim

from configs.args import load_arg_parser

## config

In [2]:
parser = load_arg_parser()
cfg = parser.parse_args("")
cfg.data_root = "/data/aiyucui2/wider"
root = cfg.data_root

# data path
cfg.anno_path = os.path.join(root, cfg.anno_path)
cfg.img_dir = os.path.join(root, cfg.img_dir)
cfg.val_anno_path = os.path.join(root, cfg.val_anno_path)
cfg.val_img_dir = os.path.join(root, cfg.val_img_dir)
cfg.gt_file_fn = os.path.join(root, cfg.gt_file_fn)

# meta data path
cfg.cheap_candidate_fn = os.path.join(root, cfg.cheap_candidate_fn)
cfg.vocab_path = os.path.join(root, cfg.vocab_path)

# sys path
cfg.model_path = os.path.join(root, cfg.model_path)
cfg.output_path = os.path.join(root, cfg.output_path)


cfg.debug = False
cfg.embed_size = 512
cfg.batch_size = 96
cfg.img_backbone_opt = "resnet18"
cfg.cap_backbone_opt = "bigru"
cfg.dim = (384,128)
cfg.dist_fn_opt = "cosine"

# exp_name
exp_name = "dist_fn_{}_imgbb_{}_capbb_{}_embed_size_{}_batch_{}_lr_{}_captype_{}".format(cfg.dist_fn_opt,
                                                                       cfg.img_backbone_opt,
                                                                       cfg.cap_backbone_opt,
                                                                       cfg.embed_size,
                                                                       cfg.batch_size,
                                                                       cfg.lr,
                                                                                        cfg.cap_embed_type)
# logger
logger = Logger("test.txt")#os.path.join(cfg.output_path, cfg.exp_name+".txt"))
print(exp_name)

dist_fn_cosine_imgbb_resnet18_capbb_bigru_embed_size_512_batch_96_lr_0.0001_captype_sent


## Loading data

In [3]:
# train loader
train_loader = build_wider_dataloader(anno_path=cfg.anno_path,
                                    img_dir=cfg.img_dir,
                                    vocab_fn=cfg.vocab_path, 
                                    dim=cfg.dim,
                                    token_length=40,
                                    train=True,
                                    batch_size=cfg.batch_size,
                                    num_workers=8,
                                    debug=cfg.debug)

# test loader (loading image and text separately)
test_text_loader, test_image_loader = build_wider_test_dataloader(anno_path=cfg.val_anno_path,
                                                              img_dir=cfg.val_img_dir,
                                                              vocab_fn=cfg.vocab_path, 
                                                              dim=cfg.dim,
                                                              batch_size=cfg.batch_size,
                                                              num_workers=8,
                                                              debug=cfg.debug)

[ds] load annotations from /data/aiyucui2/wider/wider/train/train_anns_train.json
size of dataset: 37132


In [4]:
print(len(train_loader.dataset.person2label.values()))

12003


In [5]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision
# functions to show an image
print(test_text_loader.dataset[1])
print(train_loader.dataset[1][5])
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
dataiter = iter(test_image_loader)
images, labels = dataiter.next()

# show images
imshow(images[0])

(tensor([  9,  14,   6,   8,   2,  24,   4,  10,  97, 144,  58,   4,  59,  17,
          3,  19,  16,  50, 494, 123,  30, 141,   3,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1]), 'train_query/p8848_s17661.jpg')
tensor([  9,  14,  40,   2,  66,   4,   5,  12,   3,  19,  40,   5,  17,   4,
          5,  13,  11,  30, 112,  25,  30, 446,   3,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1])


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


<Figure size 640x480 with 1 Axes>

## Define Model

In [6]:
model = Model(embed_size=cfg.embed_size, 
              image_opt=cfg.img_backbone_opt, 
              caption_opt=cfg.cap_backbone_opt).cuda()


### Distance Metrics

In [7]:
import torch
def triplet_cos_loss(x, pos, neg, margin=0.5):
    def cos_dist(x,y):
        # import pdb; pdb.set_trace()
        return 1 - torch.sum(x*y, 1) / (torch.norm(x, dim=1)*torch.norm(y, dim=1))
    pos_dist = cos_dist(x, pos)
    neg_dist = cos_dist(x, neg)
    scores = torch.clamp(pos_dist - neg_dist + margin, min=0)
    return scores.mean()
    

if cfg.dist_fn_opt == "euclidean":
    dist_fn = DistanceMetric.get_metric('euclidean').pairwise
    triplet_loss = nn.TripletMarginLoss()
elif cfg.dist_fn_opt == "cosine":
    dist_fn = cos_distance
    triplet_loss = triplet_cos_loss

### Train Misc Setup

In [8]:
evaluator = GlobalEvaluator(img_loader=test_image_loader, 
                          cap_loader=test_text_loader, 
                          gt_file_path=cfg.gt_file_fn,
                          embed_size=cfg.embed_size,
                          logger=logger,
                          dist_fn=DistanceMetric.get_metric('euclidean').pairwise)
cos_evaluator = GlobalEvaluator(img_loader=test_image_loader, 
                          cap_loader=test_text_loader, 
                          gt_file_path=cfg.gt_file_fn,
                          embed_size=cfg.embed_size,
                                logger=logger,
                          dist_fn=cos_distance)


def build_graph_optimizer(models):
    if not isinstance(models, list):
        models = [models]
    params_to_optimize = []
    for model in models:
        if model and hasattr(model, '_parameters'):
            for param in model.parameters():
                if param.requires_grad == True:
                    params_to_optimize.append(param)
    return params_to_optimize

In [9]:
def train_epoch_stage1(train_data, model, classifier, optimizer, cls_loss, note="train"):
    model.train()
    cum_loss = 0.0
    for i, data in tqdm(enumerate(train_data), "%s, epoch%d" % (note,epoch)):
        # load data
        (img,pos_img,neg_img, cap, pos_cap, neg_cap, pid, pos_pid, neg_pid) = data
        img, pos_img, neg_img = model(img.cuda()), model(pos_img.cuda()), model(neg_img.cuda())
        cap, pos_cap, neg_cap = model(cap.cuda()), model(pos_cap.cuda()), model(neg_cap.cuda())
        
        # loss
        loss = 0.0
        loss = loss + cls_loss(classifier(img), pid.cuda()) +  cls_loss(classifier(cap), pid.cuda())
        
        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        cum_loss += loss.item()
        
        # log
        if (i+1) % 64 == 0:
            print("batch %d, loss %.6f" % (i, cum_loss/64))
            cum_loss = 0.0
    return model


if False:
    # stage 1 - image channel forzen
    cls_loss = nn.CrossEntropyLoss()
    model.img_backbone.melt_layer(8)
    param_to_optimize = build_graph_optimizer([model, classifier])
    optimizer = optim.Adam(param_to_optimize, lr=1e-3, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20)
    for epoch in range(10):
        model = train_epoch_stage1(train_loader, model, classifier, optimizer, cls_loss, "train-stage-1")
        acc = evaluator.evaluate(model)
        scheduler.step()
    

In [None]:
def train_epoch(train_data, model, optimizer, triplet_loss, logger, note="train"):
    model.train()
    cum_tri_loss, cum_id_loss = 0.0, 0.0
    for i, data in tqdm(enumerate(train_data), "%s, epoch%d" % (note,epoch)):
        # load data
        (img,pos_img,neg_img, cap, pos_cap, neg_cap, pid, pos_pid, neg_pid) = data
        img, pos_img, neg_img = model(img.cuda()), model(pos_img.cuda()), model(neg_img.cuda())
        cap, pos_cap, neg_cap = model(cap.cuda()), model(pos_cap.cuda()), model(neg_cap.cuda())
        
        # loss
        tri_loss =  crossmodal_triplet_loss(img,pos_img,neg_img, 
                                              cap, pos_cap, neg_cap, 
                                              triplet_loss, cfg.dist_fn_opt)  
        loss = tri_loss
        
        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
       
        
        # log
        cum_tri_loss += tri_loss.item()
        if (i+1) % 64 == 0:
            logger.log("batch %d, [tri-loss] %.6f" % (i, cum_tri_loss/64))
            cum_tri_loss = 0.0
    return model

# stage 1 - image channel forzen
model.img_backbone.melt_layer(8)
param_to_optimize = build_graph_optimizer([model])
optimizer = optim.Adam(param_to_optimize, lr=1e-3, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20)
for epoch in range(10):
    model = train_epoch(train_loader, model, optimizer, triplet_loss, logger, "train-stage-1")
    acc = evaluator.evaluate(model)
    logger.log('[euclidean][global] R@1: %.4f | R@5: %.4f | R@10: %.4f' % (acc['top-1'], acc['top-5'], acc['top-10']))
    acc = cos_evaluator.evaluate(model)
    logger.log('[cosine   ][global] R@1: %.4f | R@5: %.4f | R@10: %.4f' % (acc['top-1'], acc['top-5'], acc['top-10']))
    scheduler.step()
    

    
# stage 2 - train all
model.img_backbone.melt_layer(7)
param_to_optimize = build_graph_optimizer([model])
optimizer = optim.Adam(param_to_optimize, lr=2e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20)
for epoch in range(60):
    model = train_epoch(train_loader, model, optimizer, triplet_loss, logger, "train-stage-2")
    acc = evaluator.evaluate(model)
    logger.log('[euclidean][global] R@1: %.4f | R@5: %.4f | R@10: %.4f' % (acc['top-1'], acc['top-5'], acc['top-10']))
    acc = cos_evaluator.evaluate(model)
    logger.log('[cosine   ][global] R@1: %.4f | R@5: %.4f | R@10: %.4f' % (acc['top-1'], acc['top-5'], acc['top-10']))
    
    scheduler.step()

HBox(children=(IntProgress(value=1, bar_style='info', description='train-stage-1, epoch0', max=1, style=Progre…

batch 63, [tri-loss] 3.843778
batch 127, [tri-loss] 2.802946
batch 191, [tri-loss] 2.264086
batch 255, [tri-loss] 2.021193
batch 319, [tri-loss] 1.832306
batch 383, [tri-loss] 1.690771



build db global imgs: 33it [00:02, 17.47it/s]
build db global caps: 65it [00:05, 11.69it/s]


[euclidean][global] R@1: 0.0036 | R@5: 0.0150 | R@10: 0.0260


build db global imgs: 33it [00:06,  4.74it/s]
build db global caps: 50it [00:00, 24.82it/s]