## Annotated Summary processing

In [103]:
import h5py
import os
import numpy as np
import pandas as pd
import math
from ortools.algorithms import pywrapknapsack_solver
import torch
import scipy.io
import torch.nn as nn

In [104]:
datasets_path='../data'
public_dataset_path=datasets_path+'/Public datasets'
tvsum_data = public_dataset_path+'/ydata-tvsum50-v1_1'
summe_data = public_dataset_path+'/SUMMe'

In [105]:
EXTRACT_FREQUENCY = 15

In [106]:
def videoname_map(name, info_df):
    if info_df is None:
        return name
    key = info_df[info_df['video_id'] == name].index[0]
    key='video_'+str(key)
    return key

#### )) Knapsnack algorithm

In [108]:
def knapsack_ortools(values, weights, items, capacity ):
    scale = 1000
    values = np.array(values)
    weights = np.array(weights)
    values = (values * scale).astype(np.int32)
    weights = (weights).astype(np.int32)
    capacity = capacity
    osolver = pywrapknapsack_solver.KnapsackSolver(pywrapknapsack_solver.KnapsackSolver.KNAPSACK_DYNAMIC_PROGRAMMING_SOLVER,'test')
    osolver.Init(values.tolist(), [weights.tolist()], [capacity])
    computed_value = osolver.Solve()
    packed_items = [x for x in range(0, len(weights))
                    if osolver.BestSolutionContains(x)]

    return packed_items

In [109]:
def knapsack_dp(values,weights,n_items,capacity,return_all=False):
    # check_inputs(values,weights,n_items,capacity)
    
    assert(isinstance(values,list))
    assert(isinstance(weights,list))
    assert(isinstance(n_items,int))
    assert(isinstance(capacity,int))
    # check value type
    assert(all(isinstance(val,int) or isinstance(val,float) for val in values))
    assert(all(isinstance(val,int) for val in weights))
    # check validity of value
    assert(all(val >= 0 for val in weights))
    assert(n_items > 0)
    assert(capacity > 0)

    table = np.zeros((n_items+1,capacity+1),dtype=np.float32)
    keep = np.zeros((n_items+1,capacity+1),dtype=np.float32)

    for i in range(1,n_items+1):
        for w in range(0,capacity+1):
            wi = weights[i-1] # weight of current item
            vi = values[i-1] # value of current item
            if (wi <= w) and (vi + table[i-1,w-wi] > table[i-1,w]):
                table[i,w] = vi + table[i-1,w-wi]
                keep[i,w] = 1
            else:
                table[i,w] = table[i-1,w]

    picks = []
    K = capacity

    for i in range(n_items,0,-1):
        if keep[i,K] == 1:
            picks.append(i)
            K -= weights[i-1]

    picks.sort()
    picks = [x-1 for x in picks] # change to 0-index

    if return_all:
        max_val = table[n_items,capacity]
        return picks,max_val
    return picks

#### )) GT Summary Generator

In [110]:
softmax = nn.Softmax(dim=-1)

def generate_gt_sum(cps, nfps, n_frames, gt_score, proportion=0.15, method='knapsack'):
    n_segs = cps.shape[0]
    seg_score = []
    for seg_idx in range(n_segs):
        start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
        # print(start,end)
        scores = gt_score[start:end]
        seg_score.append(float(scores.mean()))
    
    
    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
        
    shot_probs = softmax(torch.Tensor(seg_score))
    
    gt_probs = np.zeros(n_frames, dtype=np.float32)
    gt_summary= np.zeros((1), dtype=np.float32)
    
    
    for seg_idx in range(n_segs):
        
        first, last = cps[seg_idx]
        gt_probs[first:last + 1] = shot_probs[seg_idx]
        
        nf = nfps[seg_idx]
        if seg_idx in picks:
            tmp = np.ones((nf), dtype=np.float32)
        else:
            tmp = np.zeros((nf), dtype=np.float32)
        gt_summary = np.concatenate((gt_summary, tmp))
    
    gt_summary = np.delete(gt_summary, 0)
    return gt_probs, gt_summary
    
    

#### )) User Summary Generator

In [111]:
def make_user_summary(frame_scores, cps, n_frames, nfps, proportion=0.15, method='knapsack'):
    """Generate keyshot-based video summary i.e. a binary vector.
    Args:
    ---------------------------------------------
    - frame_scores: importance scores by users.
    - cps: change points, 2D matrix, each row contains a segment.
    - n_frames: original number of frames.
    - nfps: number of frames per segment.
    - proportion: length of video summary (compared to original video length).
    - method: defines how shots are selected, ['knapsack', 'rank'].
    """
    n_segs = cps.shape[0]

    seg_score = []
    for seg_idx in range(n_segs):
        start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
        # print(start,end)
        scores = frame_scores[start:end]
        seg_score.append(float(scores.mean()))

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        # picks = knapsack_dp(seg_score, nfps, n_segs, limits)
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0
        for i in order:
            if total_len + nfps[i] < limits:
                picks.append(i)
                total_len += nfps[i]
    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros(n_frames, dtype=np.float32)
    for seg_idx in picks:
        first, last = cps[seg_idx]
        summary[first:last + 1] = 1

    return summary

#### )) TVSum User Summaries

In [112]:
args={
    'annotation':tvsum_data+'/matlab/ydata-tvsum50.mat',
    'dataset_h5':'extracted_features/normal/TVSum.h5',
    'video_info':tvsum_data+'/data/ydata-tvsum50-info.tsv',
}

In [21]:
def get_tvsum(args):
    info_df = pd.read_csv(args['video_info'], sep='\t')
    with h5py.File(args['annotation'], 'r') as mat, h5py.File(args['dataset_h5'], 'r+') as d:
        for i in range(0,50):
            uscore_idx = mat['tvsum50/user_anno'][i, 0]
            user_scores = mat[uscore_idx]

            gtscore_idx = mat['tvsum50/gt_score'][i, 0]
            gt_score = np.squeeze(mat[gtscore_idx])
            # gt_score = gt_score[::EXTRACT_FREQUENCY]

            name_idx = mat['tvsum50/video'][i, 0]
            video_title = "".join(chr(i[0]) for i in mat[name_idx][()])
            video_name = 'video_'+str(i+1)

#             if d[video_name+'/picks'][()].shape[0]<gt_score.shape[0]:
#                 gt_score=gt_score[:d[video_name+'/picks'][()].shape[0]]
#             if d[video_name+'/picks'][()].shape[0]>gt_score.shape[0]:
#                 np.pad(gt_score,(0,d[video_name+'/picks'][()].shape[0]-gt_score.shape[0]),'constant')

            print(i,video_name, video_title, user_scores.shape, d[video_name+'/picks'][()].shape)
            
            user_summary = []
            cps = d[video_name + '/change_points'][()]
            nfps = d[video_name + '/n_frame_per_seg'][()].tolist()
            n_frames = d[video_name + '/n_frames'][()]
            
            gt_probs, gt_summary = generate_gt_sum(cps, nfps, n_frames, gt_score)
            
            
            for us in user_scores:
                one_sum = make_user_summary(us, cps, n_frames, nfps)
                user_summary.append(one_sum)
                
            
            d.create_dataset(video_name + '/gt_score', data=gt_score)
            d.create_dataset(video_name + '/gt_probs', data=gt_probs)
            d.create_dataset(video_name + '/gt_summary', data=gt_summary)
            d.create_dataset(video_name + '/user_summary', data=user_summary)

        
    return


In [22]:
get_tvsum(args)

0 video_1 AwmHb44_ouw (20, 10597) (706,)
1 video_2 98MoyGZKHXc (20, 4688) (312,)
2 video_3 J0nA4VgnoCo (20, 14019) (934,)
3 video_4 gzDbaEs1Rlg (20, 7210) (480,)
4 video_5 XzYM3PfTM4w (20, 3327) (221,)
5 video_6 HT5vyqe0Xaw (20, 9671) (644,)
6 video_7 sTEELN-vY30 (20, 4468) (297,)
7 video_8 vdmoEJ5YbrQ (20, 9870) (658,)
8 video_9 xwqBXPGE9pQ (20, 7010) (467,)
9 video_10 akI8YFjEmUw (20, 3995) (266,)
10 video_11 i3wAGJaaktw (20, 4700) (313,)
11 video_12 Bhxk-O1Y7Ho (20, 13511) (900,)
12 video_13 0tmA_C6XwfM (20, 3532) (235,)
13 video_14 3eYKfiOEJNs (20, 4853) (323,)
14 video_15 xxdtq8mxegs (20, 4324) (288,)
15 video_16 WG0MBPpPC6I (20, 9534) (635,)
16 video_17 Hl-__g2gn_A (20, 5846) (389,)
17 video_18 Yi4Ij2NM7U4 (20, 9731) (648,)
18 video_19 37rzWOQsNIw (20, 5742) (382,)
19 video_20 LRw_obCPUt0 (20, 6241) (416,)
20 video_21 cjibtmSLxQ4 (20, 19406) (1293,)
21 video_22 b626MiF1ew4 (20, 5661) (377,)
22 video_23 XkqCExn6_Us (20, 5631) (375,)
23 video_24 GsAD1KT1xo8 (20, 4356) (290,)
24 vid

#### )) SUMMe User Summaries

In [120]:
def get_summe(args):
    info_df = pd.read_csv(args['video_info'], sep='\t')['video_id'].tolist()
    count=0
    SummeAnotfiles = os.listdir(args['annotation'])
    
    with h5py.File(args['dataset_h5'], 'a') as SM:
        for file in SummeAnotfiles:
            data = scipy.io.loadmat(args['annotation']+file)
            gt_score = data['gt_score']
            user_scores = data['user_score'].T
            video_id = file.split('.')[0]
            video_title = video_id+'.mp4'
            video_name = 'video_'+str(info_df.index(video_id)+1)
            count+=1
            print(count,video_name, video_title, user_scores.shape, SM[video_name+'/picks'][()].shape)
            
            user_summary = []
            cps = SM[video_name + '/change_points'][()]
            nfps = SM[video_name + '/n_frame_per_seg'][()].tolist()
            n_frames = SM[video_name + '/n_frames'][()]
            
            gt_probs, gt_summary = generate_gt_sum(cps, nfps, n_frames, gt_score)
            
            
            for us in user_scores:
                one_sum = make_user_summary(us, cps, n_frames, nfps)
                user_summary.append(one_sum)
                
            print(gt_score.shape, gt_summary.shape, np.array(user_summary).shape)
            SM.create_dataset(video_name + '/gt_score', data=gt_score)
            SM.create_dataset(video_name + '/gt_probs', data=gt_probs)
            SM.create_dataset(video_name + '/gt_summary', data=gt_summary)
            SM.create_dataset(video_name + '/user_summary', data=user_summary)

    return



In [114]:
args={
    'annotation':summe_data+'/GT/',
    'dataset_h5':'extracted_features/normal/SUMMe.h5',
    'video_info':summe_data+'/summe_info.tsv',
}

In [121]:
get_summe(args)

1 video_1 Air_Force_One.mp4 (15, 4494) (299,)
(4494, 1) (4494,) (15, 4494)
2 video_2 Base jumping.mp4 (18, 4729) (315,)
(4729, 1) (4729,) (18, 4729)
3 video_3 Bearpark_climbing.mp4 (15, 3341) (222,)
(3341, 1) (3341,) (15, 3341)
4 video_4 Bike Polo.mp4 (15, 3064) (204,)
(3064, 1) (3064,) (15, 3064)
5 video_5 Bus_in_Rock_Tunnel.mp4 (15, 5131) (342,)
(5131, 1) (5133,) (15, 5133)
6 video_6 car_over_camera.mp4 (15, 4382) (292,)
(4382, 1) (4382,) (15, 4382)
7 video_7 Car_railcrossing.mp4 (16, 5075) (338,)
(5075, 1) (5075,) (16, 5075)
8 video_8 Cockpit_Landing.mp4 (15, 9046) (603,)
(9046, 1) (9046,) (15, 9046)
9 video_9 Cooking.mp4 (17, 1286) (85,)
(1286, 1) (1287,) (17, 1287)
10 video_10 Eiffel Tower.mp4 (15, 4971) (331,)
(4971, 1) (4971,) (15, 4971)
11 video_11 Excavators river crossing.mp4 (15, 9721) (648,)
(9721, 1) (9721,) (15, 9721)
12 video_12 Fire Domino.mp4 (15, 1612) (107,)
(1612, 1) (1612,) (15, 1612)
13 video_13 Jumps.mp4 (15, 950) (63,)
(950, 1) (950,) (15, 950)
14 video_14 Kids_