## Evaluation and testing

In [107]:
## !pip install import-ipynb
## !pip3 install ortools

In [4]:
import import_ipynb
from Model import VASNet

from ortools.algorithms import pywrapknapsack_solver

import torch
import h5py
import numpy as np
import math
from pathlib import Path

#### )) Util methods

In [5]:
def knapsack_ortools(values, weights, items, capacity ):
    scale = 1000
    values = np.array(values)
    weights = np.array(weights)
    values = (values * scale).astype(np.int32)
    weights = (weights).astype(np.int32)
    capacity = capacity
    osolver = pywrapknapsack_solver.KnapsackSolver(pywrapknapsack_solver.KnapsackSolver.KNAPSACK_DYNAMIC_PROGRAMMING_SOLVER,'test')
    osolver.Init(values.tolist(), [weights.tolist()], [capacity])
    computed_value = osolver.Solve()
    packed_items = [x for x in range(0, len(weights))
                    if osolver.BestSolutionContains(x)]

    return packed_items



def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'):
    """Generate keyshot-based video summary i.e. a binary vector.
    Args:
    ---------------------------------------------
    - ypred: predicted importance scores.
    - cps: change points, 2D matrix, each row contains a segment.
    - n_frames: original number of frames.
    - nfps: number of frames per segment.
    - positions: positions of subsampled frames in the original video.
    - proportion: length of video summary (compared to original video length).
    - method: defines how shots are selected, ['knapsack', 'rank'].
    """
    n_segs = cps.shape[0]
    frame_scores = np.zeros((n_frames), dtype=np.float32)
    if positions.dtype != int:
        positions = positions.astype(np.int32)
    if positions[-1] != n_frames:
        positions = np.concatenate([positions, [n_frames]])
    for i in range(len(positions) - 1):
        pos_left, pos_right = positions[i], positions[i+1]
        if i == len(ypred):
            frame_scores[pos_left:pos_right] = 0
        else:
            frame_scores[pos_left:pos_right] = ypred[i]

    seg_score = []
    for seg_idx in range(n_segs):
        start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
        scores = frame_scores[start:end]
        seg_score.append(float(scores.mean()))

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        #picks = knapsack_dp(seg_score, nfps, n_segs, limits)
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0
        for i in order:
            if total_len + nfps[i] < limits:
                picks.append(i)
                total_len += nfps[i]
    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros((1), dtype=np.float32) # this element should be deleted
    for seg_idx in range(n_segs):
        nf = nfps[seg_idx]
        if seg_idx in picks:
            tmp = np.ones((nf), dtype=np.float32)
        else:
            tmp = np.zeros((nf), dtype=np.float32)
        summary = np.concatenate((summary, tmp))

    summary = np.delete(summary, 0) # delete the first element
    return summary




def evaluate_summary(machine_summary, user_summary, eval_metric='avg'):
    """Compare machine summary with user summary (keyshot-based).
    Args:
    --------------------------------
    machine_summary and user_summary should be binary vectors of ndarray type.
    eval_metric = {'avg', 'max'}
    'avg' averages results of comparing multiple human summaries.
    'max' takes the maximum (best) out of multiple comparisons.
    """
    machine_summary = machine_summary.astype(np.float32)
    user_summary = user_summary.astype(np.float32)
    n_users,n_frames = user_summary.shape

    # binarization
    machine_summary[machine_summary > 0] = 1
    user_summary[user_summary > 0] = 1

    if len(machine_summary) > n_frames:
        machine_summary = machine_summary[:n_frames]
    elif len(machine_summary) < n_frames:
        zero_padding = np.zeros((n_frames - len(machine_summary)))
        machine_summary = np.concatenate([machine_summary, zero_padding])

    f_scores = []
    prec_arr = []
    rec_arr = []

    for user_idx in range(n_users):
        gt_summary = user_summary[user_idx,:]
        overlap_duration = (machine_summary * gt_summary).sum()
        precision = overlap_duration / (machine_summary.sum() + 1e-8)
        recall = overlap_duration / (gt_summary.sum() + 1e-8)
        if precision == 0 and recall == 0:
            f_score = 0.
        else:
            f_score = (2 * precision * recall) / (precision + recall)
        f_scores.append(f_score)
        prec_arr.append(precision)
        rec_arr.append(recall)

    if eval_metric == 'avg':
        final_f_score = np.mean(f_scores)
        final_prec = np.mean(prec_arr)
        final_rec = np.mean(rec_arr)
    elif eval_metric == 'max':
        final_f_score = np.max(f_scores)
        max_idx = np.argmax(f_scores)
        final_prec = prec_arr[max_idx]
        final_rec = rec_arr[max_idx]
    
    return final_f_score, final_prec, final_rec


#### )) Evaluator

In [6]:
class Evaluation:
    def __init__(self, args):
        self.use_cuda= args['use_cuda']
        self.model_path= args['model_path']
        self.data_path= args['featuresH5']
        self.dataset_name= args['dataset_name']
        self.Segs= args['SegH5'] if args['SegH5'] is not None else args['featuresH5']
        self.results_path= args['results_path']
        self.ifevaluate= args['ifgetScore']
 

    def init_model(self):
        self.model = VASNet()
        self.model.load_state_dict(torch.load(self.model_path, map_location=lambda storage, loc: storage))
        self.model.eval()
        return


    def predict(self):
        summary = {}
        att_vecs = {}
        with torch.no_grad():
            with h5py.File(self.data_path) as dataset:
                keys=dataset.keys()
                for i, key in enumerate(keys):
                    seq = dataset[key]['features'][...]
                    seq = torch.from_numpy(seq).unsqueeze(0)

                    if self.use_cuda:
                        seq = seq.float().cuda()

                    y, att_vec = self.model(seq, seq.shape[1])
                    summary[key] = y[0].detach().cpu().numpy()
                    att_vecs[key] = att_vec.detach().cpu().numpy()                   
        results = self.eval_summary(summary, att_vecs=att_vecs, eval_metric=self.dataset_name)
       
        if results!=None:
            f_score, video_scores = results
            return f_score, video_scores
        return
            
            
    def eval_summary(self, machine_summary_activations, att_vecs, eval_metric='tvsum'):
        
        gen_ms=True
        if Path(self.results_path).is_file():
            with h5py.File(self.results_path, 'r') as h5_res:
                key = list(h5_res.keys())
                # print(key)
                if 'machine_summary' in h5_res[key[0]].keys():
                    gen_ms=False
                    
        # print(gen_ms)   
        if gen_ms:
            with h5py.File(self.Segs, 'r') as Segs, h5py.File(self.data_path, 'r') as d, h5py.File(self.results_path, 'a') as h5_res:  
                akey = [k for k in Segs.keys()][0]
                if 'change_points' not in Segs[akey]:
                    print("ERROR: No change points in dataset/video ",key)
                    return

                akey = [k for k in d.keys()][0]
                ifvidName = 'video_name' in d[akey]
                

                for key in  Segs.keys():  
                    cps = Segs[key+'/change_points'][...]
                    num_frames = d[key+'/n_frames'][()]
                    nfps = d[key+'/n_frame_per_seg'][...].tolist()
                    positions = d[key+'/picks'][...]

                    probs = machine_summary_activations[key]
                    machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
                    h5_res.create_dataset(key + '/machine_summary', data=machine_summary)
                    h5_res.create_dataset(key + '/score', data=probs)
                    h5_res.create_dataset(key + '/picks', data=positions)
                    if ifvidName:
                        video_name = d[key+'/video_name'][...]
                        h5_res.create_dataset(key + '/video_name', data=video_name)
                     
                    
        with h5py.File(self.data_path, 'r') as d:
            ifEvaluatable = 'user_summary' in d[list(d.keys())[0]].keys()
        
        if self.ifevaluate  and ifEvaluatable :
            fms = []
            video_scores = []
            eval_metric = 'avg' if eval_metric == 'tvsum' else 'max'
            with h5py.File(self.results_path, 'a') as h5_res, h5py.File(self.data_path, 'r') as d:
                for key_idx, key in enumerate(d.keys()):
                    user_summary = d[key+'/user_summary'][...]
                    machine_summary = h5_res[key+'/machine_summary'][...]
    
                    fm, _, _ = evaluate_summary(machine_summary, user_summary, eval_metric)
                    fms.append(fm)
                    # Reporting & logging
                    video_scores.append([key_idx + 1, key, "{:.1%}".format(fm)])
                    gt = d[key+'/gt_score'][...]
                    h5_res.create_dataset(key + '/gt_score', data=gt)
                    h5_res.create_dataset(key + '/fm', data=fm)
                    # h5_res[key]['gt_score'][...] =gt
                    # h5_res[key]['fm'][...] = fm
                    if att_vecs is not None:
                        h5_res.create_dataset(key + '/att', data=att_vecs[key])
                        # h5_res[key]['att'][...] = att_vecs[key]

            mean_fm = np.mean(fms)
            return fms, video_scores       
        else:
            return None
                
            
            
            
        

#### )) Evaluate and Test

*1. Prebuilt dataset Test*

In [1]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/Prebuilt/eccv16_dataset_tvsum_google_pool5.h5',
    'SegH5':'../../Preprocessing/extracted_features/Prebuilt/eccv16_dataset_tvsum_google_pool5.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/tvsum_splits_4_0.5941821875878188.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/tvsum_results.h5',
    "ifgetScore": True
}

In [130]:
# evaluator = Evaluation(args)
# evaluator.init_model()
# evaluator.predict()

([0.6789853686074021,
  0.4277526982953381,
  0.695260860539091,
  0.6794149531433129,
  0.5518940922056388,
  0.5107115792509542,
  0.6221873530487353,
  0.6329145626346775,
  0.6455173102543952,
  0.7280821953243406,
  0.6111263270584189,
  0.5593201742233103,
  0.6014167464413059,
  0.49865804705805017,
  0.5954089305074854,
  0.5175790437998536,
  0.6399780086242396,
  0.5151939521970472,
  0.5063074849594303,
  0.5044567870988157,
  0.48940607565959127,
  0.5334207279692167,
  0.6229330138439523,
  0.5103378704035068,
  0.7159873790728457,
  0.6107339149256661,
  0.654796106529214,
  0.5515137205012631,
  0.6146226381940736,
  0.6266090275661015,
  0.4238929462480964,
  0.5195210940099223,
  0.5925816977429421,
  0.6188636462143663,
  0.5806615000906904,
  0.8225339056088858,
  0.6705800864720498,
  0.7104627097100793,
  0.5846208536478136,
  0.7269130584676178,
  0.5672652018999378,
  0.5505985226891198,
  0.48877655816772875,
  0.588277124352954,
  0.7169275368382654,
  0.586716

*On Normal features*

In [8]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'data/models/tvsum_splits_4_0.5941821875878188.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/tvsum_results_normal.h5',
    "ifgetScore": True
}

In [36]:
# evaluator = Evaluation(args)
# evaluator.init_model()
# evaluator.predict()

([0.6533333333291853,
  0.5656130544440531,
  0.608510638289241,
  0.6396966368215052,
  0.549999999989524,
  0.6427083333244068,
  0.7069767441750855,
  0.57578947368017,
  0.4724137930980182,
  0.6651265423074588,
  0.5831171563584225,
  0.4380434782545211,
  0.645141356530478,
  0.6036082474206061,
  0.6714285714205781,
  0.6430158373865467,
  0.5604651162703804,
  0.6271340515837813,
  0.5939393939273951,
  0.5527522935746011,
  0.6273406324641854,
  0.6250960431411086,
  0.5710714285687091,
  0.5037499999916042,
  0.48604809953119676,
  0.6065789473577793,
  0.5402255639070664,
  0.505405405396299,
  0.44545454544779617,
  0.7297468354368799,
  0.6074999999898749,
  0.6722629793713344,
  0.6768292682816776,
  0.5635007823829392,
  0.6259433695018601,
  0.8009313482768112,
  0.6644254630042482,
  0.6256800400839204,
  0.6267938008454603,
  0.64799999998272,
  0.6798646593774486,
  0.5574468085027313,
  0.5156628748989096,
  0.648284986929099,
  0.6821956779459928,
  0.6565648597988

*On Test features*

In [8]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/Test1.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/Test1.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/tvsum_splits_4_0.5941821875878188.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/test_results_normal.h5',
    "ifgetScore": False
}

In [9]:
evaluator = Evaluation(args)
evaluator.init_model()
evaluator.predict()