## Evaluation and testing

In [107]:
## !pip install import-ipynb
## !pip3 install ortools

In [5]:
import import_ipynb
from Model import VASNet

from ortools.algorithms import pywrapknapsack_solver

import torch
import h5py
import numpy as np
import math
from pathlib import Path

importing Jupyter notebook from Model.ipynb


#### )) Util methods

In [19]:
def knapsack_ortools(values, weights, items, capacity ):
    scale = 1000
    values = np.array(values)
    weights = np.array(weights)
    values = (values * scale).astype(np.int32)
    weights = (weights).astype(np.int32)
    capacity = capacity
    osolver = pywrapknapsack_solver.KnapsackSolver(pywrapknapsack_solver.KnapsackSolver.KNAPSACK_DYNAMIC_PROGRAMMING_SOLVER,'test')
    osolver.Init(values.tolist(), [weights.tolist()], [capacity])
    computed_value = osolver.Solve()
    packed_items = [x for x in range(0, len(weights))
                    if osolver.BestSolutionContains(x)]

    return packed_items



def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'):
    """Generate keyshot-based video summary i.e. a binary vector.
    Args:
    ---------------------------------------------
    - ypred: predicted importance scores.
    - cps: change points, 2D matrix, each row contains a segment.
    - n_frames: original number of frames.
    - nfps: number of frames per segment.
    - positions: positions of subsampled frames in the original video.
    - proportion: length of video summary (compared to original video length).
    - method: defines how shots are selected, ['knapsack', 'rank'].
    """
    n_segs = cps.shape[0]
    frame_scores = np.zeros((n_frames), dtype=np.float32)
    if positions.dtype != int:
        positions = positions.astype(np.int32)
    if positions[-1] != n_frames:
        positions = np.concatenate([positions, [n_frames]])
    for i in range(len(positions) - 1):
        pos_left, pos_right = positions[i], positions[i+1]
        if i == len(ypred):
            frame_scores[pos_left:pos_right] = 0
        else:
            frame_scores[pos_left:pos_right] = ypred[i]

    seg_score = []
    for seg_idx in range(n_segs):
        start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
        scores = frame_scores[start:end]
        seg_score.append(float(scores.mean()))

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        #picks = knapsack_dp(seg_score, nfps, n_segs, limits)
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0
        for i in order:
            if total_len + nfps[i] < limits:
                picks.append(i)
                total_len += nfps[i]
    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros((1), dtype=np.float32) # this element should be deleted
    for seg_idx in range(n_segs):
        nf = nfps[seg_idx]
        if seg_idx in picks:
            tmp = np.ones((nf), dtype=np.float32)
        else:
            tmp = np.zeros((nf), dtype=np.float32)
        summary = np.concatenate((summary, tmp))
    
    summary = np.delete(summary, 0) # delete the first element
    return summary




def evaluate_summary(machine_summary, user_summary, eval_metric='avg'):
    """Compare machine summary with user summary (keyshot-based).
    Args:
    --------------------------------
    machine_summary and user_summary should be binary vectors of ndarray type.
    eval_metric = {'avg', 'max'}
    'avg' averages results of comparing multiple human summaries.
    'max' takes the maximum (best) out of multiple comparisons.
    """
    machine_summary = machine_summary.astype(np.float32)
    user_summary = user_summary.astype(np.float32)
    n_users,n_frames = user_summary.shape

    # binarization
    machine_summary[machine_summary > 0] = 1
    user_summary[user_summary > 0] = 1

    if len(machine_summary) > n_frames:
        machine_summary = machine_summary[:n_frames]
    elif len(machine_summary) < n_frames:
        zero_padding = np.zeros((n_frames - len(machine_summary)))
        machine_summary = np.concatenate([machine_summary, zero_padding])

    f_scores = []
    prec_arr = []
    rec_arr = []

    for user_idx in range(n_users):
        gt_summary = user_summary[user_idx,:]
        overlap_duration = (machine_summary * gt_summary).sum()
        precision = overlap_duration / (machine_summary.sum() + 1e-8)
        recall = overlap_duration / (gt_summary.sum() + 1e-8)
        if precision == 0 and recall == 0:
            f_score = 0.
        else:
            f_score = (2 * precision * recall) / (precision + recall)
        f_scores.append(f_score)
        prec_arr.append(precision)
        rec_arr.append(recall)

    if eval_metric == 'avg':
        final_f_score = np.mean(f_scores)
        final_prec = np.mean(prec_arr)
        final_rec = np.mean(rec_arr)
    elif eval_metric == 'max':
        final_f_score = np.max(f_scores)
        max_idx = np.argmax(f_scores)
        final_prec = prec_arr[max_idx]
        final_rec = rec_arr[max_idx]
    
    return final_f_score, final_prec, final_rec


#### )) Evaluator

In [11]:
class Evaluation:
    def __init__(self, args):
        self.use_cuda= args['use_cuda']
        self.model_path= args['model_path']
        self.data_path= args['featuresH5']
        self.dataset_name= args['dataset_name']
        self.Segs= args['SegH5'] if args['SegH5'] is not None else args['featuresH5']
        self.results_path= args['results_path']
        self.ifevaluate= args['ifgetScore']
 

    def init_model(self):
        self.model = VASNet()
        self.model.load_state_dict(torch.load(self.model_path, map_location=lambda storage, loc: storage))
        self.model.eval()
        return


    def predict(self):
        summary = {}
        att_vecs = {}
        with torch.no_grad():
            with h5py.File(self.data_path) as dataset:
                keys=dataset.keys()
                for i, key in enumerate(keys):
                    seq = dataset[key]['features'][...]
                    seq = torch.from_numpy(seq).unsqueeze(0)

                    if self.use_cuda:
                        seq = seq.float().cuda()

                    y, att_vec = self.model(seq, seq.shape[1])
                    summary[key] = y[0].detach().cpu().numpy()
                    att_vecs[key] = att_vec.detach().cpu().numpy()    
                    print(len(summary[key]))
                    
                    
        results = self.eval_summary(summary, att_vecs=att_vecs, eval_metric=self.dataset_name)
       
        if results!=None:
            f_score, video_scores = results
            return f_score, video_scores
        return
            
            
    def eval_summary(self, machine_summary_activations, att_vecs, eval_metric='tvsum'):
        
        gen_ms=True
        if Path(self.results_path).is_file():
            with h5py.File(self.results_path, 'r') as h5_res:
                key = list(h5_res.keys())
                # print(key)
                if 'machine_summary' in h5_res[key[0]].keys():
                    gen_ms=False
                    
        # print(gen_ms)   
        if gen_ms:
            with h5py.File(self.Segs, 'r') as Segs, h5py.File(self.data_path, 'r') as d, h5py.File(self.results_path, 'a') as h5_res:  
                akey = [k for k in Segs.keys()][0]
                if 'change_points' not in Segs[akey]:
                    print("ERROR: No change points in dataset/video ",key)
                    return

                akey = [k for k in d.keys()][0]
                ifvidName = 'video_name' in d[akey]
                

                for key in  Segs.keys():  
                    print(key)
                    cps = Segs[key+'/change_points'][...]
                    # print('CPS:', len(cps))
                    num_frames = d[key+'/n_frames'][()]
                    nfps = d[key+'/n_frame_per_seg'][...].tolist()
                    # print('nfps sum:', sum(nfps))
                    positions = d[key+'/picks'][...]
                    # print('postions:', len(positions))

                    probs = machine_summary_activations[key]
                    machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
                    h5_res.create_dataset(key + '/machine_summary', data=machine_summary)
                    h5_res.create_dataset(key + '/score', data=probs)
                    h5_res.create_dataset(key + '/picks', data=positions)
                    if ifvidName:
                        video_name = d[key+'/video_name'][...]
                        h5_res.create_dataset(key + '/video_name', data=video_name)


                     
                    
        with h5py.File(self.data_path, 'r') as d:
            ifEvaluatable = 'user_summary' in d[list(d.keys())[0]].keys()
        
        if self.ifevaluate  and ifEvaluatable :
            fms = []
            video_scores = []
            eval_metric = 'avg' if eval_metric == 'tvsum' else 'max'
            with h5py.File(self.results_path, 'a') as h5_res, h5py.File(self.data_path, 'r') as d:
                for key_idx, key in enumerate(d.keys()):
                    user_summary = d[key+'/user_summary'][...]
                    machine_summary = h5_res[key+'/machine_summary'][...]
    
                    fm, _, _ = evaluate_summary(machine_summary, user_summary, eval_metric)
                    fms.append(fm)
                    # Reporting & logging
                    video_scores.append([key_idx + 1, key, "{:.1%}".format(fm)])
                    gt = d[key+'/gt_score'][...]
                    h5_res.create_dataset(key + '/gt_score', data=gt)
                    h5_res.create_dataset(key + '/fm', data=fm)
                    # h5_res[key]['gt_score'][...] =gt
                    # h5_res[key]['fm'][...] = fm
                    if att_vecs is not None:
                        h5_res.create_dataset(key + '/att', data=att_vecs[key])
                        # h5_res[key]['att'][...] = att_vecs[key]

            mean_fm = np.mean(fms)
            return fms, video_scores       
        else:
            return None
                
            
            
            
        

#### )) Evaluate and Test

*1. Prebuilt dataset Test*

In [1]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/Prebuilt/eccv16_dataset_tvsum_google_pool5.h5',
    'SegH5':'../../Preprocessing/extracted_features/Prebuilt/eccv16_dataset_tvsum_google_pool5.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/tvsum_splits_4_0.5941821875878188.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/tvsum_results.h5',
    "ifgetScore": True
}

In [130]:
# evaluator = Evaluation(args)
# evaluator.init_model()
# evaluator.predict()

([0.6789853686074021,
  0.4277526982953381,
  0.695260860539091,
  0.6794149531433129,
  0.5518940922056388,
  0.5107115792509542,
  0.6221873530487353,
  0.6329145626346775,
  0.6455173102543952,
  0.7280821953243406,
  0.6111263270584189,
  0.5593201742233103,
  0.6014167464413059,
  0.49865804705805017,
  0.5954089305074854,
  0.5175790437998536,
  0.6399780086242396,
  0.5151939521970472,
  0.5063074849594303,
  0.5044567870988157,
  0.48940607565959127,
  0.5334207279692167,
  0.6229330138439523,
  0.5103378704035068,
  0.7159873790728457,
  0.6107339149256661,
  0.654796106529214,
  0.5515137205012631,
  0.6146226381940736,
  0.6266090275661015,
  0.4238929462480964,
  0.5195210940099223,
  0.5925816977429421,
  0.6188636462143663,
  0.5806615000906904,
  0.8225339056088858,
  0.6705800864720498,
  0.7104627097100793,
  0.5846208536478136,
  0.7269130584676178,
  0.5672652018999378,
  0.5505985226891198,
  0.48877655816772875,
  0.588277124352954,
  0.7169275368382654,
  0.586716

*On Normal features*

In [8]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'data/models/tvsum_splits_4_0.5941821875878188.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/tvsum_results_normal.h5',
    "ifgetScore": True
}

In [36]:
# evaluator = Evaluation(args)
# evaluator.init_model()
# evaluator.predict()

([0.6533333333291853,
  0.5656130544440531,
  0.608510638289241,
  0.6396966368215052,
  0.549999999989524,
  0.6427083333244068,
  0.7069767441750855,
  0.57578947368017,
  0.4724137930980182,
  0.6651265423074588,
  0.5831171563584225,
  0.4380434782545211,
  0.645141356530478,
  0.6036082474206061,
  0.6714285714205781,
  0.6430158373865467,
  0.5604651162703804,
  0.6271340515837813,
  0.5939393939273951,
  0.5527522935746011,
  0.6273406324641854,
  0.6250960431411086,
  0.5710714285687091,
  0.5037499999916042,
  0.48604809953119676,
  0.6065789473577793,
  0.5402255639070664,
  0.505405405396299,
  0.44545454544779617,
  0.7297468354368799,
  0.6074999999898749,
  0.6722629793713344,
  0.6768292682816776,
  0.5635007823829392,
  0.6259433695018601,
  0.8009313482768112,
  0.6644254630042482,
  0.6256800400839204,
  0.6267938008454603,
  0.64799999998272,
  0.6798646593774486,
  0.5574468085027313,
  0.5156628748989096,
  0.648284986929099,
  0.6821956779459928,
  0.6565648597988

*On Test features*

In [48]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/TVSum.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/vasnet00.pth.tar', 
    "dataset_name": 'tvsum',
    "results_path": 'results/trial_results_normal.h5',
    "ifgetScore": True
}

In [49]:
evaluator = Evaluation(args)
evaluator.init_model()
evaluator.predict()

706
video_1
CPS: 184
nfps sum: 10597
postions: 706
seg_score: [0.709821343421936, 0.3971872329711914, 0.2155281901359558, 0.18528829514980316, 0.16015592217445374, 0.189638152718544, 0.19478054344654083, 0.1770884096622467, 0.18793202936649323, 0.17621979117393494, 0.22595185041427612, 0.16143690049648285, 0.19143997132778168, 0.18894004821777344, 0.1890314221382141, 0.2117774337530136, 0.19068583846092224, 0.19151203334331512, 0.1910530924797058, 0.20015238225460052, 0.179305300116539, 0.2061338573694229, 0.18577304482460022, 0.19220662117004395, 0.18728192150592804, 0.2012205272912979, 0.19300280511379242, 0.17945663630962372, 0.20428043603897095, 0.19029569625854492, 0.19703246653079987, 0.19066168367862701, 0.19459865987300873, 0.18897601962089539, 0.18149447441101074, 0.19064593315124512, 0.19160164892673492, 0.1940896213054657, 0.19735035300254822, 0.1839095801115036, 0.18938495218753815, 0.3077976405620575, 0.1982794851064682, 0.1861240118741989, 0.18759672343730927, 0.195817217

KeyError: 'Unable to open object (component not found)'

In [14]:
fs=[0.67857142856712,
  0.560387343143741,
  0.5925531914809568,
  0.5889482067307652,
  0.648571428559075,
  0.5822916666585793,
  0.7476744185930592,
  0.6478947368375587,
  0.49051724137367225,
  0.6604802718822775,
  0.6182789177877488,
  0.44891304347175487,
  0.5911112651823218,
  0.6280927835029964,
  0.6678571428491922,
  0.6287205052502718,
  0.517441860457094,
  0.5714089481888032,
  0.6151515151390877,
  0.6142201834824819,
  0.6492814469440631,
  0.623094302985584,
  0.5935714285686021,
  0.5962499999900626,
  0.5262918298567565,
  0.4828947368336333,
  0.5278195488695346,
  0.531081081071512,
  0.4863636363562673,
  0.7291139240444802,
  0.5999999999899999,
  0.504978423520602,
  0.531707317064525,
  0.5441431425574741,
  0.6303210131170066,
  0.8140267309187239,
  0.5339150721883504,
  0.6144527660912564,
  0.5942356613110814,
  0.6819999999818134,
  0.6798669416813202,
  0.5691489361621397,
  0.52375508710044,
  0.675405545073669,
  0.6076678640385648,
  0.6456952945815905,
  0.6307291666622868,
  0.7181818181709367,
  0.6596938775465329,
  0.596551606282759]

In [15]:
np.mean(fs)

0.6040264859740025

In [50]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/TVSum05s.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/TVSum05s.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/model@3.pth.tar', 
    "dataset_name": 'tvsum',
    "results_path": 'results/vasnet05s_results.h5',
    "ifgetScore": True
}

In [52]:
evaluator = Evaluation(args)
evaluator.init_model()
evaluator.predict()

707
video_1
CPS: 184
nfps sum: 10597
postions: 708
seg_score: [0.5199824571609497, 0.5199579000473022, 0.32673853635787964, 0.061718665063381195, 0.043454404920339584, 0.03830771520733833, 0.025595560669898987, 0.03240128606557846, 0.020675525069236755, 0.20391884446144104, 0.13097736239433289, 0.11060253530740738, 0.12916316092014313, 0.11589038372039795, 0.11070483922958374, 0.12257546186447144, 0.1269274204969406, 0.062151726335287094, 0.03277938812971115, 0.030860213562846184, 0.03332009166479111, 0.03847526013851166, 0.03584791719913483, 0.2072364240884781, 0.32018333673477173, 0.2964320182800293, 0.23463141918182373, 0.08521806448698044, 0.11592835187911987, 0.09365510195493698, 0.049663856625556946, 0.07808169722557068, 0.018072720617055893, 0.012818527407944202, 0.013371746055781841, 0.07458586990833282, 0.1811676025390625, 0.1851520538330078, 0.20965981483459473, 0.20952628552913666, 0.21870730817317963, 0.22229363024234772, 0.2148541659116745, 0.1641010344028473, 0.1836336404

KeyError: 'Unable to open object (component not found)'

In [15]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/TVSum05sooo.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/TVSum05sooo.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/model@3.pth.tar.pth', 
    "dataset_name": 'tvsum',
    "results_path": 'results/vasnet05sooo_results.h5',
    "ifgetScore": True
}

In [38]:
evaluator = Evaluation(args)
evaluator.init_model()
evaluator.predict()

[0.54848355 0.5598483  0.44876775 0.45749238 0.59750986 0.3043679
 0.30957326 0.09467079 0.05594251 0.01974953 0.04322577 0.03729133
 0.04951241 0.05585061 0.03403658 0.03505798 0.04865693 0.03533698
 0.02937422 0.04075298 0.04372174 0.04508198 0.03523981 0.0343787
 0.02445016 0.02341663 0.02713013 0.02383053 0.02221448 0.03301975
 0.02958886 0.02698389 0.0304078  0.04734514 0.02628844 0.02163303
 0.01839371 0.01985246 0.02258323 0.0294279  0.27681866 0.26741213
 0.28309464 0.18398769 0.25493687 0.22091642 0.08752769 0.07657791
 0.08745363 0.1117355  0.10610151 0.12550536 0.09417587 0.11248901
 0.1275179  0.11349601 0.11305779 0.14756948 0.14384553 0.1054085
 0.12540111 0.11047928 0.13075791 0.11625266 0.10483105 0.10990468
 0.10707231 0.12253115 0.0845765  0.12175436 0.12363166 0.12321813
 0.11351319 0.12085492 0.14092804 0.12827313 0.1421006  0.11465566
 0.12480418 0.12627454 0.1029135  0.09546895 0.09720979 0.0317311
 0.03342278 0.02961852 0.03314856 0.02815546 0.04759837 0.0307902


([0.2962998102448052,
  0.23000845308148765,
  0.30587813619633153,
  0.2898070262232073,
  0.27062737642071044,
  0.22114720110267938,
  0.7220930232446187,
  0.276933846690396,
  0.32709529276317917,
  0.6839274275922752,
  0.29122807017203234,
  0.2250896057315399,
  0.3938906752369358,
  0.26237726098100816,
  0.6421428571352126,
  0.38534842167498096,
  0.14290123456569598,
  0.6458611249312151,
  0.3422018348554087,
  0.2446182152698889,
  0.3887175324643773,
  0.22575786463212294,
  0.2513442778955445,
  0.41613445377451874,
  0.25962848296892094,
  0.16994680850762506,
  0.26884057970880143,
  0.2558983666015263,
  0.31012084591676553,
  0.2153910849435207,
  0.6259999999895667,
  0.260459770108955,
  0.6159292563461686,
  0.21080203986823548,
  0.20374488004561883,
  0.6341645885234068,
  0.30260770974713597,
  0.4349249658876545,
  0.2810600155840832,
  0.3576248312993893,
  0.6568627450951772,
  0.19837799717632748,
  0.269904596699568,
  0.6053763440791695,
  0.373346897245

In [23]:
args={
    'verbose':True,
    'use_cuda':False,
    'cuda_device':0,
    'max_summary_length':0.15,
    'featuresH5':'../../Preprocessing/extracted_features/normal/customSet.h5',
    'SegH5':'../../Preprocessing/extracted_features/normal/customSet.h5',
    'splits':None,
    "train" : False,
    "model_path" : 'models/model@3.pth.tar', 
    "dataset_name": 'tvsum',
    "results_path": 'results/customSetResults2.h5',
    "ifgetScore": True
}

In [24]:
evaluator = Evaluation(args)
evaluator.init_model()
evaluator.predict()

235
323
9749
884
354
video_1
video_2
video_3
video_4
video_5
