In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import _init_paths
import os
import sys
import numpy as np
import argparse
import pprint
import time
import cv2
import pickle
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import pickle
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.rpn.twin_transform import clip_twins
from model.nms.nms_wrapper import nms
from model.rpn.twin_transform import twin_transform_inv
from model.utils.net_utils import save_net, load_net, vis_detections
from model.tdcnn.c3d import C3D, c3d_tdcnn
from model.tdcnn.i3d import I3D, i3d_tdcnn
from model.utils.blob import prep_im_for_blob, video_list_to_blob
from model.tdcnn.resnet import resnet34, resnet50, resnet_tdcnn

# np.set_printoptions(threshold='nan')
DEBUG = False

try:
    xrange  # Python 2
except NameError:
    xrange = range  # Python 3


In [16]:
def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Test a R-C3D network')
    parser.add_argument('--dataset', dest='dataset', default='thumos14', type=str,
                        help='test dataset')
    parser.add_argument('--net', dest='net', default='c3d', type=str, choices=['c3d', 'res18', 'res34', 'res50', 'eco'],
                        help='main network c3d, i3d, res34, res50')
    parser.add_argument('--set', dest='set_cfgs', nargs=argparse.REMAINDER,
                        help='set config keys', default=None)
    parser.add_argument('--load_dir', dest='load_dir', type=str,
                        help='directory to load models', default="./models")
    parser.add_argument('--output_dir', dest='output_dir', type=str,
                        help='directory for the log files', default="./output")
    parser.add_argument('--cuda', dest='cuda', action='store_true',
                        help='whether use CUDA')
    parser.add_argument('--checksession', default=1, type=int,
                        help='checksession to load model')
    parser.add_argument('--checkepoch', default=1, type=int,
                        help='checkepoch to load network')
    parser.add_argument('--checkpoint', default=9388, type=int,
                        help='checkpoint to load network')
    parser.add_argument('--nw', dest='num_workers', default=8, type=int,
                        help='number of worker to load data')
    parser.add_argument('--bs', dest='batch_size', default=1, type=int,
                        help='batch_size, only support batch_size=1')
    parser.add_argument('--vis', dest='vis', action='store_true',
                        help='visualization mode')
    parser.add_argument('--roidb_dir', dest='roidb_dir', default="./preprocess",
                        help='roidb_dir')
    parser.add_argument('--gpus', dest='gpus', nargs='+', type=int, default=0,
                        help='gpu ids.')
    manual_args = '--net c3d --dataset thumos14 --cuda --gpus 0 --bs 1 --nw 2 --checksession 1 --checkepoch 7 --checkpoint 1960'.split(' ')
    args = parser.parse_args(manual_args)
    return args

In [9]:
def get_roidb(path):
    data = pickle.load(open(path, 'rb'))
    return data


combined_pooled_feat = []


def comp_pairwise_dist():
    num_vecs = len(combined_pooled_feat)
    dist = np.zeros([num_vecs, num_vecs])
    for i in range(num_vecs):
        for j in range(num_vecs):
            dist[i][j] = torch.norm(combined_pooled_feat[i] - combined_pooled_feat[j])
        sorted_vals, sorted_idx = torch.sort(torch.Tensor(dist[i]))
        print('Those closest to %d are %s' % (i, str(sorted_idx[:5])))

In [17]:
args = parse_args()

print('Called with args:')
print(args)

cudnn.benchmark = True
if torch.cuda.is_available() and not args.cuda:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")

np.random.seed(cfg.RNG_SEED)
if args.dataset == "thumos14":
    # args.imdb_name = "train_data_25fps_flipped.pkl"
    args.imdbval_name = "val_data_25fps.pkl"
    args.num_classes = 21
    args.set_cfgs = ['ANCHOR_SCALES', '[2,4,5,6,8,9,10,12,14,16]', 'NUM_CLASSES', args.num_classes]
    # args.set_cfgs = ['ANCHOR_SCALES', '[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56]', 'NUM_CLASSES', args.num_classes]
elif args.dataset == "activitynet":
    # args.imdb_name = "train_data_5fps_flipped.pkl"
    args.imdbval_name = "val_data_25fps.pkl"
    args.num_classes = 201
    # args.set_cfgs = ['ANCHOR_SCALES', '[1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64]', 'NUM_CLASSES', args.num_classes]
    args.set_cfgs = ['ANCHOR_SCALES',
                     '[1,1.25, 1.5,1.75, 2,2.5, 3,3.5, 4,4.5, 5,5.5, 6,7, 8,9,10,11,12,14,16,18,20,22,24,28,32,36,40,44,52,60,68,76,84,92,100]',
                     'NUM_CLASSES', args.num_classes]

args.cfg_file = "cfgs/{}_{}.yml".format(args.net, args.dataset)

if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)
if args.set_cfgs is not None:
    cfg_from_list(args.set_cfgs)

cfg.USE_GPU_NMS = args.cuda
cfg.CUDA = args.cuda

print('Using config:')
pprint.pprint(cfg)

trimmed_support_set_roidb_path = os.path.join(args.roidb_dir, args.dataset, "trimmed_14_cls.pkl")
trimmed_support_set_roidb = get_roidb(trimmed_support_set_roidb_path)
trimmed_support_set_dataset = roibatchLoader(trimmed_support_set_roidb, phase='train')
trimmed_support_set_dataloader = torch.utils.data.DataLoader(trimmed_support_set_dataset, batch_size=args.batch_size,
                                                             num_workers=args.num_workers, shuffle=False)

untrimmed_test_roidb_path = args.roidb_dir + "/" + args.dataset + "/" + args.imdbval_name
untrimmed_test_roidb = get_roidb(untrimmed_test_roidb_path)
untrimmed_test_dataset = roibatchLoader(untrimmed_test_roidb, phase='test')
untrimmed_test_dataloader = torch.utils.data.DataLoader(untrimmed_test_dataset, batch_size=args.batch_size,
                                         num_workers=args.num_workers, shuffle=False)


Called with args:
Namespace(batch_size=1, checkepoch=7, checkpoint=1960, checksession=1, cuda=True, dataset='thumos14', gpus=[0], load_dir='./models', net='c3d', num_workers=2, output_dir='./output', roidb_dir='./preprocess', set_cfgs=None, vis=False)
Using config:
{'ANCHOR_SCALES': [2, 4, 5, 6, 8, 9, 10, 12, 14, 16],
 'CUDA': True,
 'DATA_DIR': '/home/vltava/few-shot-activity-localization/rc3d-chen/data',
 'DEDUP_TWINS': 0.125,
 'EPS': 1e-14,
 'EXP_DIR': 'default',
 'FEAT_STRIDE': [8],
 'INPUT': 'video',
 'MAX_NUM_GT_TWINS': 20,
 'NUM_CLASSES': 21,
 'PIXEL_MEANS': array([[[ 90,  98, 102]]]),
 'PIXEL_MEANS_FLOW': array([128]),
 'POOLING_HEIGHT': 2,
 'POOLING_LENGTH': 4,
 'POOLING_MODE': 'pool',
 'POOLING_WIDTH': 2,
 'RESNET': {'FIXED_BLOCKS': 0, 'MAX_POOL': False},
 'RNG_SEED': 3,
 'ROI_CTX_SCALE': [],
 'ROOT_DIR': '/home/vltava/few-shot-activity-localization/rc3d-chen',
 'RPN_HAS_MASK': False,
 'RPN_OUTPUT_SCORE': False,
 'TEMP_SPARSE_SAMPLING': False,
 'TEST': {'NMS': 0.4,
          

In [27]:
num_videos = len(untrimmed_test_dataset)
args.num_videos = num_videos
print('{:d} roidb entries'.format(num_videos))

model_dir = args.load_dir + "/" + args.net + "/" + args.dataset
if not os.path.exists(model_dir):
    raise Exception('There is no input directory for loading network from ' + model_dir)
output_dir = args.output_dir + "/" + args.net + "/" + args.dataset
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
load_name = os.path.join(model_dir,
                         'tdcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

# initilize the network here.
if args.net == 'c3d':
    tdcnn_demo = c3d_tdcnn(pretrained=False)
elif args.net == 'res18':
    tdcnn_demo = resnet_tdcnn(depth=18, pretrained=False)
elif args.net == 'res34':
    tdcnn_demo = resnet_tdcnn(depth=34, pretrained=False)
elif args.net == 'res50':
    tdcnn_demo = resnet_tdcnn(depth=50, pretrained=False)
else:
    print("network is not defined")

tdcnn_demo.create_architecture()
# save memory
for key, value in tdcnn_demo.named_parameters(): value.requires_grad = False
print(tdcnn_demo)

#    if args.cuda and torch.cuda.is_available():
#        tdcnn_demo = tdcnn_demo.cuda()
#        if isinstance(args.gpus, int):
#            args.gpus = [args.gpus]
# assert len(args.gpus) == args.batch_size, "only support one batch_size for one gpu"
#        tdcnn_demo = nn.parallel.DataParallel(tdcnn_demo, device_ids = args.gpus)

print("load checkpoint %s" % (load_name))
checkpoint = torch.load(load_name)
tdcnn_demo.load_state_dict(checkpoint['model'])
if 'pooling_mode' in checkpoint.keys():
    cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')

if args.cuda and torch.cuda.is_available():
    tdcnn_demo = tdcnn_demo.cuda()
    if isinstance(args.gpus, int):
        args.gpus = [args.gpus]
    # assert len(args.gpus) == args.batch_size, "only support one batch_size for one gpu"
    tdcnn_demo = nn.parallel.DataParallel(tdcnn_demo, device_ids=args.gpus)

12766 roidb entries
cfg.POOLING_LENGTH: 4
c3d_tdcnn(
  (RCNN_rpn): _RPN(
    (RPN_Conv1): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
    (RPN_Conv2): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
    (RPN_output_pool): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (RPN_cls_score): Conv3d(512, 20, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (RPN_twin_pred): Conv3d(512, 20, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (RPN_proposal): _ProposalLayer()
    (RPN_anchor_target): _AnchorTargetLayer()
  )
  (RCNN_proposal_target): _ProposalTargetLayer()
  (RCNN_roi_temporal_pool): _RoITemporalPooling()
  (RCNN_base): Sequential(
    (0): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU(inplace)
    (2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv3d(64, 128, kernel_size=(3, 3, 3), st

In [29]:
dataloader = untrimmed_test_dataloader
fewshot_trimmed_dataloader = trimmed_support_set_dataloader
loaded_all_few_shot_features = pickle.load(open('/home/vltava/fewshot_features.pkl', 'rb'))

start = time.time()
# TODO: Add restriction for max_per_video
max_per_video = 0

if args.vis:
    thresh = 0.4
else:
    thresh = 0.4 # Original: 0.005

all_twins = [[[] for _ in xrange(args.num_videos)]
             for _ in xrange(args.num_classes)]

_t = {'im_detect': time.time(), 'misc': time.time()}

tdcnn_demo.eval()
empty_array = np.transpose(np.array([[], [], []]), (1, 0))

if loaded_all_few_shot_features is not None:
    all_fewshot_features = loaded_all_few_shot_features
else:
    all_fewshot_features = {}

if loaded_all_few_shot_features is None:
    print('Loading fewshot trimmed videos')
    for i, (video_data, gt_twins, num_gt) in enumerate(fewshot_trimmed_dataloader):
        class_idx = torch.squeeze(gt_twins)[0, 2].item()
        if class_idx in all_fewshot_features:
            continue
        print('Class idx: %d' % class_idx)
        video_data = video_data.cuda()
        gt_twins = gt_twins.cuda()
        tdcnn_demo(video_data, gt_twins, whole_vid_for_testing=True)
        fewshot_features = torch.squeeze(tdcnn_demo.module.pooled_feat)
        all_fewshot_features[class_idx] = fewshot_features

    pickle.dump(all_fewshot_features, open('/home/vltava/fewshot_features.pkl', 'wb'))

print('Got %d few shot features' % len(all_fewshot_features))

Got 14 few shot features


In [31]:
data_tic = time.time()
for i, (video_data, gt_twins, num_gt, video_info) in enumerate(dataloader):
    video_data = video_data.cuda()
    gt_twins = gt_twins.cuda()
    batch_size = video_data.shape[0]
    data_toc = time.time()
    data_time = data_toc - data_tic

    det_tic = time.time()
    rois, cls_prob, twin_pred = tdcnn_demo(video_data, gt_twins, whole_vid_for_testing=False)
    print('rois shape: %s' % str(rois.shape))
    print('pooled_feat dim: %s' % str(tdcnn_demo.module.pooled_feat.shape))
    # combined_pooled_feat.append(torch.squeeze(tdcnn_demo.module.pooled_feat))
    #        rpn_loss_cls, rpn_loss_twin, \
    #        RCNN_loss_cls, RCNN_loss_twin, rois_label = tdcnn_demo(video_data, gt_twins)

    scores_all = cls_prob.data
    twins = rois.data[:, :, 1:3]

    print('cls_prob shape: %s, twin_pred shape: %s' % (str(cls_prob.shape), str(twin_pred.shape)))

    if cfg.TEST.TWIN_REG:
        # Apply bounding-twin regression deltas
        twin_deltas = twin_pred.data
        if cfg.TRAIN.TWIN_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            twin_deltas = twin_deltas.view(-1, 2) * torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_STDS).type_as(
                twin_deltas) \
                          + torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_MEANS).type_as(twin_deltas)
            twin_deltas = twin_deltas.view(batch_size, -1, 2 * args.num_classes)

        pred_twins_all = twin_transform_inv(twins, twin_deltas, batch_size)
        pred_twins_all = clip_twins(pred_twins_all, cfg.TRAIN.LENGTH[0], batch_size)
    else:
        # Simply repeat the twins, once for each class
        pred_twins_all = np.tile(twins, (1, scores_all.shape[1]))

    det_toc = time.time()
    detect_time = det_toc - det_tic

    for b in range(batch_size):
        misc_tic = time.time()
        print(video_info[b])
        scores = scores_all[b]  # scores.squeeze()
        pred_twins = pred_twins_all[b]  # .squeeze()

        # skip j = 0, because it's the background class
        class_with_highest_score = None
        highest_score = None
        for j in xrange(1, args.num_classes):
            # We know that the test example is one among the 14
            if j not in loaded_all_few_shot_features:
                continue

            # scores[:, j] shape: torch.Size([300])
            for score_i in range(scores[:, j].shape[0]):
                vec1 = torch.squeeze(tdcnn_demo.module.pooled_feat[score_i])
                vec2 = loaded_all_few_shot_features[j]
                scores[:, j][score_i] = torch.nn.functional.cosine_similarity(
                    vec1, vec2, dim=0)
                # print('updated score at %d is %f' % (score_i, scores[:, j][score_i]))

            inds = torch.nonzero(scores[:, j] > thresh).view(-1)

            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_twins = pred_twins[inds][:, j * 2:(j + 1) * 2]
                # print(inds)

                cls_dets = torch.cat((cls_twins, cls_scores.unsqueeze(1)), 1)
                # print('cls_dets shape: %s' % str(cls_dets.shape))
                # print(cls_dets)
                # cls_dets = torch.cat((cls_twins, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                if (len(keep) > 0):
                    cls_dets = cls_dets[keep.view(-1).long()]
                    print("activity: ", j)
                    print(cls_dets.cpu().numpy())

                all_twins[j][i * batch_size + b] = cls_dets.cpu().numpy()
                max_score = torch.max(cls_dets[:, 2])
                if highest_score is None or max_score > highest_score:
                    highest_score = max_score
                    class_with_highest_score = j
            else:
                all_twins[j][i * batch_size + b] = empty_array

        if class_with_highest_score is not None:
            print('======================================')
            print('Class with highest score is %d with score %f' % (class_with_highest_score, highest_score))
            print('======================================')

        # Limit to max_per_video detections *over all classes*
        if max_per_video > 0:
            video_scores = np.hstack([all_twins[j][i * batch_size + b][:, -1]
                                      for j in xrange(1, args.num_classes)])
            if len(video_scores) > max_per_video:
                video_thresh = np.sort(video_scores)[-max_per_video]
                for j in xrange(1, args.num_classes):
                    keep = np.where(all_twins[j][i * batch_size + b][:, -1] >= video_thresh)[0]
                    all_twins[j][i * batch_size + b] = all_twins[j][i * batch_size + b][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic
        print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s' \
              .format(i * batch_size + b + 1, args.num_videos, data_time / batch_size, detect_time / batch_size,
                      nms_time))

    if args.vis:
        pass

    data_tic = time.time()
end = time.time()
print("test time: %0.4fs" % (end - start))

rois shape: torch.Size([1, 300, 3])
pooled_feat dim: torch.Size([300, 4096])
cls_prob shape: torch.Size([1, 300, 21]), twin_pred shape: torch.Size([1, 300, 42])
 flipped: False
 frames: [[  0   0 768   1]]
 bg_name: /home/vltava/disk2/THUMOS14_valtest_combined_fewshot/frames/test/video_validation_0000901
 fg_name: /home/vltava/disk2/THUMOS14_valtest_combined_fewshot/frames/test/video_validation_0000901
activity:  8
[[5.669397e+02 6.908723e+02 4.212957e-01]]
activity:  9
[[5.6776184e+02 6.9099268e+02 4.2252576e-01]]
activity:  15
[[5.4287695e+02 6.6389868e+02 4.0382621e-01]]
Class with highest score is 9 with score 0.422526
im_detect: 1/12766 1.273s 3.008s 0.702s
rois shape: torch.Size([1, 300, 3])
pooled_feat dim: torch.Size([300, 4096])
cls_prob shape: torch.Size([1, 300, 21]), twin_pred shape: torch.Size([1, 300, 42])
 flipped: False
 frames: [[  0 192 960   1]]
 bg_name: /home/vltava/disk2/THUMOS14_valtest_combined_fewshot/frames/test/video_validation_0000901
 fg_name: /home/vltava/

Process Process-12:
Traceback (most recent call last):
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/vltava/anaconda3/envs/rc3d2/lib/python3.6/multiprocessing/connection.py", line 379

KeyboardInterrupt: 