## Annotated Summary processing

In [54]:
import h5py
import numpy as np
import pandas as pd
import math
from ortools.algorithms import pywrapknapsack_solver


In [42]:
datasets_path='../data'
public_dataset_path=datasets_path+'/Public datasets'
tvsum_data = public_dataset_path+'/ydata-tvsum50-v1_1'
summe_data = public_dataset_path+'/SUMMe'

In [3]:
def videoname_map(name, info_df):
    if info_df is None:
        return name
    key = info_df[info_df['video_id'] == name].index[0]
    key='video_'+str(key)
    return key

In [None]:
parser.add_argument('-d', '--dataset', type=str, required=True, help="path to h5 dataset (required)")
parser.add_argument('-s', '--split', type=str, required=True, help="path to split file (required)")
# parser.add_argument('--split-id', type=int, default=4, help="split index (default: 0)")
parser.add_argument('-m', '--metric', type=str, required=True, choices=['tvsum', 'summe'],
                    help="evaluation metric ['tvsum', 'summe']")
# Model options
parser.add_argument('--input-dim', type=int, default=1024, help="input dimension (default: 1024)")
parser.add_argument('--hidden-dim', type=int, default=256, help="hidden unit dimension of DSN (default: 256)")
parser.add_argument('--num-layers', type=int, default=1, help="number of RNN layers (default: 1)")
parser.add_argument('--rnn-cell', type=str, default='lstm', help="RNN cell type (default: lstm)")
# parser.add_argument('--dropout', type=float, default=0.0, help="dropout rate")

# Optimization options
parser.add_argument('--lr', type=float, default=1e-05, help="learning rate (default: 1e-05)")
parser.add_argument('--weight-decay', type=float, default=1e-05, help="weight decay rate (default: 1e-05)")
parser.add_argument('--max-epoch', type=int, default=60, help="maximum epoch for training (default: 60)")
parser.add_argument('--stepsize', type=int, default=30, help="how many steps to decay learning rate (default: 30)")
parser.add_argument('--gamma', type=float, default=0.1, help="learning rate decay (default: 0.1)")
parser.add_argument('--num-episode', type=int, default=5, help="number of episodes (default: 5)")
parser.add_argument('--beta', type=float, default=0.01, help="weight for summary length penalty term (default: 0.01)")
# Misc
parser.add_argument('--seed', type=int, default=1, help="random seed (default: 1)")
parser.add_argument('--gpu', type=str, default='0', help="which gpu devices to use")
parser.add_argument('--use-cpu', action='store_true', help="use cpu device")
parser.add_argument('--evaluate', action='store_true', help="whether to do evaluation only")
parser.add_argument('--save-dir', type=str, help="path to save output")
parser.add_argument('--resume', type=str, default='', help="path to resume file")
parser.add_argument('--verbose', action='store_true', help="whether to show detailed test results")
# parser.add_argument('--save-results', default=True, help="whether to save output results")

args = parser.parse_args()

torch.manual_seed(args.seed)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
use_gpu = torch.cuda.is_available()
if args.use_cpu: use_gpu = False

#### )) Knapsnack algorithm

In [52]:
def knapsack_ortools(values, weights, items, capacity ):
    scale = 1000
    values = np.array(values)
    weights = np.array(weights)
    values = (values * scale).astype(np.int32)
    weights = (weights).astype(np.int32)
    capacity = capacity
    osolver = pywrapknapsack_solver.KnapsackSolver(pywrapknapsack_solver.KnapsackSolver.KNAPSACK_DYNAMIC_PROGRAMMING_SOLVER,'test')
    osolver.Init(values.tolist(), [weights.tolist()], [capacity])
    computed_value = osolver.Solve()
    packed_items = [x for x in range(0, len(weights))
                    if osolver.BestSolutionContains(x)]

    return packed_items

In [43]:
def knapsack_dp(values,weights,n_items,capacity,return_all=False):
    # check_inputs(values,weights,n_items,capacity)
    
    assert(isinstance(values,list))
    assert(isinstance(weights,list))
    assert(isinstance(n_items,int))
    assert(isinstance(capacity,int))
    # check value type
    assert(all(isinstance(val,int) or isinstance(val,float) for val in values))
    assert(all(isinstance(val,int) for val in weights))
    # check validity of value
    assert(all(val >= 0 for val in weights))
    assert(n_items > 0)
    assert(capacity > 0)

    table = np.zeros((n_items+1,capacity+1),dtype=np.float32)
    keep = np.zeros((n_items+1,capacity+1),dtype=np.float32)

    for i in range(1,n_items+1):
        for w in range(0,capacity+1):
            wi = weights[i-1] # weight of current item
            vi = values[i-1] # value of current item
            if (wi <= w) and (vi + table[i-1,w-wi] > table[i-1,w]):
                table[i,w] = vi + table[i-1,w-wi]
                keep[i,w] = 1
            else:
                table[i,w] = table[i-1,w]

    picks = []
    K = capacity

    for i in range(n_items,0,-1):
        if keep[i,K] == 1:
            picks.append(i)
            K -= weights[i-1]

    picks.sort()
    picks = [x-1 for x in picks] # change to 0-index

    if return_all:
        max_val = table[n_items,capacity]
        return picks,max_val
    return picks

#### )) User Summary Generator

In [55]:
def make_user_summary(frame_scores, cps, n_frames, nfps, proportion=0.15, method='knapsack'):
    """Generate keyshot-based video summary i.e. a binary vector.
    Args:
    ---------------------------------------------
    - frame_scores: importance scores by users.
    - cps: change points, 2D matrix, each row contains a segment.
    - n_frames: original number of frames.
    - nfps: number of frames per segment.
    - proportion: length of video summary (compared to original video length).
    - method: defines how shots are selected, ['knapsack', 'rank'].
    """
    n_segs = cps.shape[0]

    seg_score = []
    for seg_idx in range(n_segs):
        start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
        # print(start,end)
        scores = frame_scores[start:end]
        seg_score.append(float(scores.mean()))

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        # picks = knapsack_dp(seg_score, nfps, n_segs, limits)
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0
        for i in order:
            if total_len + nfps[i] < limits:
                picks.append(i)
                total_len += nfps[i]
    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros(n_frames, dtype=np.float32)
    for seg_idx in picks:
        first, last = cps[seg_idx]
        summary[first:last + 1] = 1

    return summary

#### )) TVSum User Summaries

In [45]:
args={
    'annotation':tvsum_data+'/matlab/ydata-tvsum50.mat',
    'dataset_h5':'extracted_features/normal/TVSum.h5',
    'video_info':tvsum_data+'/data/ydata-tvsum50-info.tsv',
}

In [75]:
def get_tvsum(args):
    info_df = pd.read_csv(args['video_info'], sep='\t')
    with h5py.File(args['annotation'], 'r') as mat, h5py.File(args['dataset_h5'], 'r+') as d:
        for i in range(0,50):
            uscore_idx = mat['tvsum50/user_anno'][i, 0]
            user_scores = mat[uscore_idx]

            gtscore_idx = mat['tvsum50/gt_score'][i, 0]
            gt_score = np.squeeze(mat[gtscore_idx])
            # gt_score = gt_score[]

            name_idx = mat['tvsum50/video'][i, 0]
            # video_title = "".join(chr(i[0]) for i in mat[name_idx][()])
            video_name = 'video_'+str(i+1)

            if d[video_name+'/picks'][()].shape[0]<gt_score.shape[0]:
                gt_score=gt_score[:d[video_name+'/picks'][()].shape[0]]
            if d[video_name+'/picks'][()].shape[0]>gt_score.shape[0]:
                np.pad(gt_score,(0,d[video_name+'/picks'][()].shape[0]-gt_score.shape[0]),'constant')

            print(i, user_scores.shape, gt_score.shape, d[video_name+'/picks'][()].shape)

            user_summary = []
            cps = d[video_name + '/change_points'][()]
            nfps = d[video_name + '/n_frame_per_seg'][()].tolist()
            n_frames = d[video_name + '/n_frames'][()]
            for us in user_scores:
                one_sum = make_user_summary(us, cps, n_frames, nfps)
                user_summary.append(one_sum)
            # print(gt_score)
            d.create_dataset(video_name + '/gt_score', data=gt_score)
            d.create_dataset(video_name + '/user_summary', data=user_summary)

        
    return user_summary


In [77]:
user_summary = get_tvsum(args)

0 (20, 10597) (706,) (706,)
1 (20, 4688) (312,) (312,)
2 (20, 14019) (934,) (934,)
3 (20, 7210) (480,) (480,)
4 (20, 3327) (221,) (221,)
5 (20, 9671) (644,) (644,)
6 (20, 4468) (297,) (297,)
7 (20, 9870) (658,) (658,)
8 (20, 7010) (467,) (467,)
9 (20, 3995) (266,) (266,)
10 (20, 4700) (313,) (313,)
11 (20, 13511) (900,) (900,)
12 (20, 3532) (235,) (235,)
13 (20, 4853) (323,) (323,)
14 (20, 4324) (288,) (288,)
15 (20, 9534) (635,) (635,)
16 (20, 5846) (389,) (389,)
17 (20, 9731) (648,) (648,)
18 (20, 5742) (382,) (382,)
19 (20, 6241) (416,) (416,)
20 (20, 19406) (1293,) (1293,)
21 (20, 5661) (377,) (377,)
22 (20, 5631) (375,) (375,)
23 (20, 4356) (290,) (290,)
24 (20, 6580) (438,) (438,)
25 (20, 3312) (220,) (220,)
26 (20, 10917) (727,) (727,)
27 (20, 8281) (552,) (552,)
28 (20, 17527) (1168,) (1168,)
29 (20, 4005) (267,) (267,)
30 (20, 5412) (360,) (360,)
31 (20, 3802) (253,) (253,)
32 (20, 13365) (891,) (891,)
33 (20, 3705) (247,) (247,)
34 (20, 4463) (297,) (297,)
35 (20, 7959) (530,