In [1]:
import sys
dataset_folder = "../datasets"
sys.path.insert(0, "../")
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "5"

In [2]:
import os
import random
import time

import numpy as np
import torch
from tqdm import tqdm

import datasets
import models
import video_transforms
import darknorm
from loss import CELoss
from utils.util import AverageMeter, accuracy

In [46]:
device = torch.device('cuda')
# using gpu 0
width = 170
height = 128
input_size = 112
length = 64
seed = 1234
ckpt_location = 'checkpoints/ce_EE6222_DarkNormr18_triv_norm_gic_g1.8_1234'
if 'norm' in ckpt_location:
    dark_std = True
else:
    dark_std = False
if 'gic' in ckpt_location:
    light = True
    # dark_std = False
    pass
else:
    light = False
if 'g1.8' in ckpt_location:
    gamma = 1.8
else:
    gamma = 3.0

In [47]:
def buildModel(model_path, num_categories, device, multiGPUTrain=True, multiGPUTest=False):
    model = models.__dict__['DarkNorm'](num_classes=num_categories, length=length)
    params = torch.load(model_path, map_location=device)

    if multiGPUTest:
        model = torch.nn.DataParallel(model)
        new_dict = {"module." + k: v for k, v in params['state_dict'].items()}
        model.load_state_dict(new_dict)

    elif multiGPUTrain:
        new_dict = {k[7:]: v for k, v in params['state_dict'].items()}
        model_dict = model.state_dict()
        model_dict.update(new_dict)
        model.load_state_dict(model_dict)
    else:
        model.load_state_dict(params['state_dict'])
    model.to(device)
    model.eval()
    return model

In [48]:
def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    lossesClassification = AverageMeter()
    top1 = AverageMeter()
    top2 = AverageMeter()
    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, input in enumerate(val_loader):
            (inputs, targets) = input
            inputs = inputs.view(-1, length, 3, input_size,
                                 input_size).transpose(1, 2)
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)

            targets = targets.cuda()

            lossClassification = criterion(output, targets, epoch)

            # measure accuracy and record loss
            if isinstance(output, tuple) and len(output) > 1:
                output = output[0]
            acc1, acc2 = accuracy(output.data, targets, topk=(1, 2))

            lossesClassification.update(
                lossClassification.data.item(), output.size(0))

            top1.update(acc1.item(), output.size(0))
            top2.update(acc2.item(), output.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

        print(f'validate * * Prec@1 {top1.avg:.3f} Prec@5 {top2.avg:.3f}'
              f'Classification Loss {lossesClassification.avg:.4f}')
    return top1.avg, top2.avg, lossesClassification.avg

In [49]:
model_path = os.path.join('../', ckpt_location, 'model_best.pth.tar')
assert os.path.exists(model_path), 'model path not exist'

data_dir = os.path.join(dataset_folder, 'EE6222_frames_test')
extension = 'img_{0:05d}.jpg'
val_fileName = "test_split%d.txt" % 1

In [50]:
test_file = os.path.join(dataset_folder, 'settings', 'EE6222', val_fileName)

In [51]:
clip_mean_light = [0.485, 0.456, 0.406] * 1* length
clip_std_light = [0.229, 0.224, 0.225] * 1 * length
clip_mean = [0.0702773, 0.06571121, 0.06437492] * 1 * length
clip_std = [0.08475896, 0.08116068, 0.07479476] * 1 * length
if dark_std:
    normalize = video_transforms.Normalize(mean=clip_mean_light,
                                           std=clip_std_light)
else:
    normalize = video_transforms.Normalize(mean=clip_mean,
                                                   std=clip_std)
val_transform = video_transforms.Compose([
        video_transforms.CenterCrop((input_size)),
        video_transforms.ToTensor(),
        normalize,
    ])

criterion = CELoss().to(device)

In [52]:
val_dataset = datasets.EE6222(root=data_dir,
                              modality="rgb",
                              source=test_file,
                              phase="val",
                              is_color=True,
                              new_length=length,
                              new_width=width,
                              new_height=height,
                              video_transform=val_transform,
                              num_segments=1,
                              gamma=gamma,
                              method='gamma',
                              light=light)
print(f"Light: {light}")
print(f"Gamma: {gamma}")
print(f"DarkStd: {dark_std}")

Light: True
Gamma: 1.8
DarkStd: True


In [53]:
def seed_worker(_):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(seed)

val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=16, shuffle=False,
        num_workers=8, pin_memory=True,
        worker_init_fn=seed_worker,
        generator=g
)

In [54]:
model_start_time = time.time()
spatial_net = buildModel(model_path, 10, device)
model_end_time = time.time()
model_time = model_end_time - model_start_time
print("Action recognition model is loaded in %4.4f seconds." % (model_time))

Action recognition model is loaded in 1.2427 seconds.


In [55]:
acc1, acc2, lossClassification = validate(val_loader, spatial_net, criterion, -1)

validate * * Prec@1 60.444 Prec@5 83.111Classification Loss 1.3556
