In [None]:
import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import torch as T
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import numpy as np
from torch.optim import lr_scheduler
from torch.distributions import Bernoulli
import h5py
import json
import os
import argparse
from utils import *
import time
import datetime
import argparse
import random
import math
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("Agg")
plt.rcParams['figure.figsize'] = (8.0, 4.0)

from sklearn.metrics import f1_score
from joblib import Parallel, delayed
import pandas as pd
from scipy.stats import kendalltau, spearmanr
from scipy.stats import rankdata
import sys
import hdf5storage

## config

In [None]:
Visual_Feature_Dim = 1024
Global_Seq_Len = 20 #全局帧列的长度
Local_Seq_Len = 20 #关键帧列的长度

DATASET_NAME = 'summe'
SAVE_DIR = 'resualt/20/' # path to save output (default: log/)
RESUME = True
LOAD_DIR = 'resualt/20/model_epoch14_0.39060460573767825_summe.pth.tar'
METRIC = DATASET_NAME # evaluation metric ['tvsum', 'summe'])
NUM_LAYERS = 1 # number of RNN layers (default: 1)
START_EPOCH = 0
epis = 0
max_epis = 1
BETA = 0.01  #default 0.1
SEGMENT_LEN = 50 #fiveunits
NUM_STEPS = 5
Switch_Iteration = 10
BATCH_SIZE = 1
LR = 1e-05 # learning rate (default: 1e-05)
WEIGHT_DECAY = 1e-05 # weight decay rate (default: 1e-05)
MAX_EPOCH = 300 # maximum epoch for training (default: 60)
STEP_SIZE = 30 # how many steps to decay learning rate (default: 30)



DATASET = 'datasets/eccv16_dataset_' + DATASET_NAME + '_google_pool5.h5' 
SPLIT = 'datasets/' + DATASET_NAME + '_splits.json' # path to split file (required)
SPLIT_ID = 0 # split index (default: 0)
GAMMA = 0.1 # learning rate decay (default: 0.1)
# Model options
INPUT_DIM = 1024 # input dimension (default: 1024)
HIDDEN_DIM = 256 # hidden unit dimension of DSN (default: 256)
RNN_CELL = 'lstm' # RNN cell type (default: lstm)
SEED = 1 # random seed (default: 1)
GPU = '0' # which gpu devices to use (default: 0)
USE_CPU = False # use cpu device
EVALUATE = False # whether to do evaluation only
TEST = False # whether to do evaluation only
VERBOSE = True # whether to show detailed test results
SAVE_RESULTS = True # whether to save output results

## utils

In [None]:
def normalized_columns_initializer(weights, std=1.0):
    out = torch.randn(weights.size())
    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True).expand_as(out))
    return out

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.normal_(m.weight.data, mean=0, std=0.01)
        m.bias.data.fill_(0)

    elif classname.find('Linear') != -1:
        torch.nn.init.normal_(m.weight.data)
        m.bias.data.fill_(0)

In [None]:
def read_json(fpath):
    with open(fpath, 'r') as f:
        obj = json.load(f)
    return obj

def write_json(splits, save_path):
    if not os.path.exists(os.path.dirname(save_path)):
        os.mkdir(os.path.dirname(save_path))

    with open(save_path, 'w') as f:
        json.dump(splits, f, indent=4, separators=(', ', ': '))
        
def save_checkpoint(state, fpath='checkpoint.pth.tar'):
    if not os.path.exists(os.path.dirname(fpath)):
        os.mkdir(os.path.dirname(fpath))

    torch.save(state, fpath)

In [None]:
def choose_ten_frame(N):
    idx = []
    ten_unit  = int(N / float(11))
    for i in range(10):
        idx.append(int(np.floor((i + 1) * ten_unit)))
    return idx

In [None]:
def check_inputs(values, weights, n_items, capacity):
    # check variable type
    assert(isinstance(values,list))
    assert(isinstance(weights,list))
    assert(isinstance(n_items,int))
    assert(isinstance(capacity,int))
    # check value type
    assert(all(isinstance(val,int) or isinstance(val,float) for val in values))
    assert(all(isinstance(val,int) for val in weights))
    # check validity of value
    assert(all(val >= 0 for val in weights))
    assert(n_items > 0)
    assert(capacity > 0)
    
def knapsack_dp(values,weights,n_items,capacity,return_all = False):
    check_inputs(values,weights,n_items,capacity)

    table = np.zeros((n_items+1,capacity+1),dtype=np.float32)
    keep = np.zeros((n_items+1,capacity+1),dtype=np.float32)

    for i in range(1,n_items+1):
        for w in range(0,capacity+1):
            wi = weights[i-1] # weight of current item
            vi = values[i-1] # value of current item
            if (wi <= w) and (vi + table[i-1,w-wi] > table[i-1,w]):
                table[i,w] = vi + table[i-1,w-wi]
                keep[i,w] = 1
            else:
                table[i,w] = table[i-1,w]

    picks = []
    K = capacity

    for i in range(n_items,0,-1):
        if keep[i,K] == 1:
            picks.append(i)
            K -= weights[i-1]

    picks.sort()
    picks = [x-1 for x in picks] # change to 0-index

    if return_all:
        max_val = table[n_items,capacity]
        return picks,max_val
    return picks


In [None]:
def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'):
    """
        Generate keyshot-based video summary. i.e. a binary vector

    Args:
        ypred: predicted importance scores.
        cps: change points, 2D matrix, each row contains a segment.
        n_frames: original number of frames.
        nfps: number of frames per segment.
        positions: positions of subsampled frames in the original video.
        proportion: length of video summary (compared to original video length).
        method: defines how shots are selected, ['knapsack', 'rank'].

    """

    n_segs = cps.shape[0]

    # Frame Score
    frame_scores = np.zeros((n_frames), dtype=np.float32)
    if positions.dtype != int:
        positions = positions.astype(np.int32)

    if positions[-1] != n_frames:
        positions = np.concatenate([positions, [n_frames]])

    for idx in range(len(positions) - 1):
        pos_cur, pos_next = positions[idx], positions[idx+1]

        if idx == len(ypred):
            frame_scores[pos_cur:pos_next] = 0
        else:
            frame_scores[pos_cur:pos_next] = ypred[idx]

    # Segment Score
    seg_score = []
    for seg_idx in range(n_segs):
        pos_start, pos_end = int(cps[seg_idx, 0]), int(cps[seg_idx, 1]+1)
        scores = frame_scores[pos_start: pos_end]
        seg_score.append(float(scores.mean()))

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        picks = knapsack_dp(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0

        for idx in order:
            if total_len + nfps[idx] < limits:
                picks.append(idx)
                total_len += nfps[idx]

    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros((1), dtype=np.float32) # this element should be deleted
    for seg_idx in range(n_segs):
        nf = nfps[seg_idx]
        if seg_idx in picks:
            tmp = np.ones((nf), dtype=np.float32)
        else:
            tmp = np.zeros((nf), dtype=np.float32)

        summary = np.concatenate((summary, tmp))

    summary = np.delete(summary, 0) # delete the first element
    return summary

def evaluate_summary(machine_summary, user_summary, eval_metric='avg'):
    """
        Compare machine summary with user summary (Keyshot-based).

    Args:
        machine_summary: summary by machine
        user_summary: summary by user(annotation)
        eval_metric: {'avg', 'max'}
            'avg' : average results of comparing multiple human summaries.
            'max' : takes the maximum(best) out of multiple comparisons.
    """

    machine_summary = machine_summary.astype(np.float32)
    user_summary = user_summary.astype(np.float32)
    n_users, n_frames = user_summary.shape

    # binarization
    machine_summary[machine_summary > 0] = 1
    user_summary[user_summary > 0] = 1

    if len(machine_summary) > n_frames:
        machine_summary = machine_summary[:n_frames]
    elif len(machine_summary) < n_frames:
        zero_padding = np.zeros((n_frames - len(machine_summary)))
        machine_summary = np.concatenate([machine_summary, zero_padding])

    f_scores = []
    prec_arr = []
    rec_arr = []

    for user_idx in range(n_users):
        gt_summary = user_summary[user_idx, :]
        overlap_duration = (machine_summary * gt_summary).sum()
        precision = overlap_duration / (machine_summary.sum() + 1e-8)
        recall = overlap_duration / (gt_summary.sum() + 1e-8)
        if precision == 0 and recall == 0:
            f_score = 0.
        else:
            f_score = (2 * precision * recall) / (precision + recall)

        f_scores.append(f_score)
        prec_arr.append(precision)
        rec_arr.append(recall)

    if eval_metric == 'avg':
        final_f_score = np.mean(f_scores)
        final_prec = np.mean(prec_arr)
        final_rec = np.mean(rec_arr)

    elif eval_metric == 'max':
        final_f_score = np.max(f_scores)
        max_idx = np.argmax(f_scores)
        final_prec = prec_arr[max_idx]
        final_rec = rec_arr[max_idx]

    return final_f_score, final_prec, final_rec

In [None]:
def compute_reward(seq, actions, ignore_far_sim = True, temp_dist_thre = 20, use_gpu = False):
    """
    Compute diversity reward and representativeness reward

    Args:
        seq: sequence of features, shape (1, seq_len, dim)
        actions: binary action sequence, shape (1, seq_len, 1)
        ignore_far_sim (bool): whether to ignore temporally distant similarity (default: True)
        temp_dist_thre (int): threshold for ignoring temporally distant similarity (default: 20)
        use_gpu (bool): whether to use GPU
    """
    _seq = seq.detach()
    _actions = actions.detach()
    pick_idxs = _actions.squeeze().nonzero().squeeze()
    num_picks = len(pick_idxs) if pick_idxs.ndimension() > 0 else 1
    
    if num_picks == 0:
        # give zero reward is no frames are selected
        reward = torch.tensor(0.)
        if use_gpu: reward = reward.cuda()
        return reward

    _seq = _seq.squeeze()
    n = _seq.size(0)

    # compute diversity reward
    if num_picks == 1:
        reward_div = torch.tensor(0.)
        if use_gpu: reward_div = reward_div.cuda()
    else:
        normed_seq = _seq / _seq.norm(p=2, dim=1, keepdim=True)
        dissim_mat = 1. - torch.matmul(normed_seq, normed_seq.t()) # dissimilarity matrix [Eq.4]
        dissim_submat = dissim_mat[pick_idxs,:][:,pick_idxs]
        if ignore_far_sim:
            # ignore temporally distant similarity
            pick_mat = pick_idxs.expand(num_picks, num_picks)
            temp_dist_mat = torch.abs(pick_mat - pick_mat.t())
            dissim_submat[temp_dist_mat > temp_dist_thre] = 1.
        reward_div = dissim_submat.sum() / (num_picks * (num_picks - 1.)) # diversity reward [Eq.3]

    # compute representativeness reward
    dist_mat = torch.pow(_seq, 2).sum(dim=1, keepdim=True).expand(n, n)
    dist_mat = dist_mat + dist_mat.t()
    dist_mat.addmm_(1, -2, _seq, _seq.t())
    dist_mat = dist_mat[:,pick_idxs]
    dist_mat = dist_mat.min(1, keepdim=True)[0]
    reward_rep = torch.exp(-dist_mat.mean())

    # combine the two rewards
    reward = (reward_div + reward_rep) * 0.5

    return reward

In [None]:
def compute_F_score(probs, dataset, key, use_gpu):
    
    eval_metric = 'avg' if METRIC == 'tvsum' else 'max'

    if VERBOSE: table = [["No.", "Video", "F-Score"]]
    seq = dataset[key]['features'][...]
    seq = torch.from_numpy(seq).unsqueeze(0)

    if use_gpu: seq = seq.cuda()
    probs = probs.data.cpu().squeeze().numpy()
    
    cps = dataset[key]['change_points'][...]
    num_frames = dataset[key]['n_frames'][()]
    nfps = dataset[key]['n_frame_per_seg'][...].tolist()
    positions = dataset[key]['picks'][...]
    user_summary = dataset[key]['user_summary'][...]

    machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
    fm, _, _ = evaluate_summary(machine_summary, user_summary, eval_metric)
    
    fm = np.mean(fm)#确保可以打印
    #print("compute F-sorce;F-Score {:.1%}".format(fm))

    return fm

In [None]:
def calculate_root_reward(change_index, update_idx, seq, global_all_norm, dataset, train_key, use_gpu):
    global_all_norm = global_all_norm[0]
    reward = 0
    F_score_list = torch.zeros(Local_Seq_Len)
    Max_F_score = 0
    
    length = len(seq)
    seq = torch.from_numpy(seq).unsqueeze(0)
    actions = torch.zeros(1, length, 1)
    for i in range(Local_Seq_Len):
        actions[0][update_idx[i]] = 1
    
    for i in range(Local_Seq_Len):
        F_score_list[i] = compute_F_score(actions, dataset, train_key, use_gpu)
        Max_F_score = max(F_score_list[i], Max_F_score)
    
    if F_score_list[change_index] == Max_F_score:
        reward = max_epis + compute_reward(seq, actions)
    else:
        reward = epis + compute_reward(seq, actions)

    return reward

In [None]:
def calculate_leaf_reward(change_index, update_idx, seq, dataset, train_key, use_gpu):
    reward = 0
    length = len(seq)
    seq = torch.from_numpy(seq).unsqueeze(0)
    actions = torch.zeros(1, length, 1)
    for i in range(Local_Seq_Len):
        actions[0][update_idx[i]] = 1
        
    reward = epis + compute_reward(seq, actions)
    return reward

In [None]:
def inital_data_process(seq):
    idx = choose_ten_frame(len(seq))
    global_feature_1 = seq[idx[0]]
    global_feature_2 = seq[idx[1]]
    global_feature_3 = seq[idx[2]]
    global_feature_4 = seq[idx[3]]
    global_feature_5 = seq[idx[4]]
    global_feature_6 = seq[idx[5]]
    global_feature_7 = seq[idx[6]]
    global_feature_8 = seq[idx[7]]
    global_feature_9 = seq[idx[8]]
    global_feature_10 = seq[idx[9]]
    
    global_feature_concate = np.concatenate([global_feature_1, global_feature_2, global_feature_3, global_feature_4, global_feature_5, \
                                              global_feature_6, global_feature_7, global_feature_8, global_feature_9, global_feature_10], axis=0)
    idx_list = [i + 1 for i in range(Global_Seq_Len)]
    five_unit = int(len(seq) / SEGMENT_LEN)
    num_units = len(seq)
    return global_feature_concate, idx, idx_list, five_unit, num_units

In [None]:
def determine_left_move_range(idx, index, current_id, five_unit, num_units):
    update_id = current_id
    update_idx = []
    for i in range(len(idx)):
        update_idx = idx
    abnormal_done = 1
    num_units = int(num_units)

    if update_id < 0 or update_id > num_units:
        abnormal_done = 0
    else:
        update_id = update_id - five_unit
        
        if update_id < 0:
            update_id = 0
        if update_id >= num_units:
            update_id = num_units - 1
        
        if index == 0:
            if update_id >= idx[index + 1]:
                abnormal_done = 0
        elif index == len(idx) - 1:
            if update_id <= idx[index - 1]:
                abnormal_done = 0
        else:
            if update_id >= idx[index + 1] or update_id <= idx[index - 1]:
                abnormal_done = 0
    update_idx[index] = update_id
  
    if abnormal_done == 0:
        return torch.from_numpy(np.array(idx)), current_id, current_id, abnormal_done
    return torch.from_numpy(np.array(update_idx)), update_id, current_id, abnormal_done

In [None]:
def determine_right_move_range(idx, index, current_id, five_unit, num_units):
    update_id = current_id
    update_idx = []
    for i in range(len(idx)):
        update_idx = idx
    abnormal_done = 1
    num_units = int(num_units)
    
    if update_id < 0 or update_id > num_units:
        abnormal_done = 0
    else:
        update_id = update_id + five_unit
        
        if update_id < 0:
            update_id = 0
        if update_id >= num_units:
            update_id = num_units - 1
        
        if index == 0:
            if update_id >= idx[index + 1]:
                abnormal_done = 0
        elif index == len(idx) - 1:
            if update_id <= idx[index - 1]:
                abnormal_done = 0
        else:
            if update_id >= idx[index + 1] or update_id <= idx[index - 1]:
                abnormal_done = 0
    update_idx[index] = update_id
    if abnormal_done == 0:
        return torch.from_numpy(np.array(idx)), current_id, current_id, abnormal_done
  
    return torch.from_numpy(np.array(update_idx)), update_id, current_id, abnormal_done

In [None]:
def determine_left_offset_range(idx, index, current_id, five_unit, num_units):
    update_id = current_id
    update_idx = []
    for i in range(len(idx)):
        update_idx = idx
    abnormal_done = 1
    num_units = int(num_units)
    
    if update_id < 0 or update_id > num_units:
        abnormal_done = 0
    else:
        update_id = update_id - 1
        
        if update_id < 0:
            update_id = 0
        if update_id >= num_units:
            update_id = num_units - 1
        
        if index == 0:
            if update_id >= idx[index + 1]:
                abnormal_done = 0
        elif index == len(idx) - 1:
            if update_id <= idx[index - 1]:
                abnormal_done = 0
        else:
            if update_id >= idx[index + 1] or update_id <= idx[index - 1]:
                abnormal_done = 0
    update_idx[index] = update_id
    if abnormal_done == 0:
        return torch.from_numpy(np.array(idx)), current_id, current_id, abnormal_done

    return torch.from_numpy(np.array(update_idx)), update_id, current_id, abnormal_done

In [None]:
def determine_right_offset_range(idx, index, current_id, five_unit, num_units):
    update_id = current_id
    update_idx = []
    for i in range(len(idx)):
        update_idx = idx
    abnormal_done = 1
    num_units = int(num_units)
    
    if update_id < 0 or update_id > num_units:
        abnormal_done = 0
    else:
        update_id = update_id + 1
        
        if update_id < 0:
            update_id = 0
        if update_id >= num_units:
            update_id = num_units - 1
        
        if index == 0:
            if update_id >= idx[index + 1]:
                abnormal_done = 0
        elif index == len(idx) - 1:
            if update_id <= idx[index - 1]:
                abnormal_done = 0
        else:
            if update_id >= idx[index + 1] or update_id <= idx[index - 1]:
                abnormal_done = 0
    update_idx[index] = update_id
    if abnormal_done == 0:
        return torch.from_numpy(np.array(idx)), current_id, current_id, abnormal_done

    return torch.from_numpy(np.array(update_idx)), update_id, current_id, abnormal_done

In [None]:
def freeze_net(net, global_flag):

    if global_flag == True:
        # don't compute the gradient of local polict network
        ct = 0
        for child in net.children():
            ct +=1
            if ct == 6:
                for param in child.parameters():
                    param.requires_grad = False
            else:
                for param in child.parameters():
                    param.requires_grad = True
    else:
        # don't compute the gradient of global polict network
        ct = 0
        for child in net.children():
            ct +=1
            if ct == 5:
                for param in child.parameters():
                    param.requires_grad = False
            else:
                for param in child.parameters():
                    param.requires_grad = True

In [None]:
def evaluate(model, test_keys, use_gpu):
    print("===> Evaluation")
    dataset = h5py.File(DATASET, 'r')
    with torch.no_grad():
        model.eval()
        if use_gpu: model = model.cuda()
        fms = []
        eval_metric = 'avg' if METRIC == 'tvsum' else 'max'
        
        test_dataset = Charades_dataset(test_keys)
        testloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 0)
        
        for batch_idx, key in enumerate(testloader):
            seq = dataset[key[0]]['features'][...]
            seq_furtrue = torch.from_numpy(seq).unsqueeze(0) 
            global_feature_concate, idx, idx_list, five_unit, num_units = inital_data_process(seq)#[1024]
            global_feature_concate = torch.from_numpy(global_feature_concate).unsqueeze(0)        #[batch_size,1024]
            hidden_state = torch.zeros(BATCH_SIZE, Visual_Feature_Dim)
            current_feature_concate = global_feature_concate
            BEST_F_score = 0.0

            for step in range(NUM_STEPS):
                if use_gpu: 
                    hidden_state = hidden_state.cuda()
                    current_feature_concate = current_feature_concate.cuda()
                    global_feature_concate = global_feature_concate.cuda()
                    seq_furtrue = seq_furtrue.cuda()

                hidden_state, global_policy, move_policy, iou_out2 = model(seq_furtrue, global_feature_concate, current_feature_concate, hidden_state)


                probs = iou_out2.data.cpu().squeeze().numpy()
                cps = dataset[key[0]]['change_points'][...]
                num_frames = dataset[key[0]]['n_frames'][()]
                nfps = dataset[key[0]]['n_frame_per_seg'][...].tolist()
                positions = dataset[key[0]]['picks'][...]
                user_summary = dataset[key[0]]['user_summary'][...]

                machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
                fm, _, _ = evaluate_summary(machine_summary, user_summary, eval_metric)

                if fm > BEST_F_score:
                    BEST_F_score = fm

                global_policy_prob = F.softmax(global_policy, dim = 1)
                global_policy_action = global_policy_prob.max(1, keepdim = True)[1].data.cpu().numpy()[:, 0]


                update_idx = idx
                local_abnormal_done = torch.ones(BATCH_SIZE)

                for i in range(BATCH_SIZE):

                    for local_id in range(Local_Seq_Len):
                        if global_policy_action[i] == local_id:      #表示对第几帧进行操作
                            move_policy_prob = F.softmax(move_policy[i], dim = 0)
                            move_policy_action = move_policy_prob.max(0, keepdim=True)[1].data.cpu().numpy()

                            if move_policy_action == 0:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                local_abnormal_done[i] = 0
                
                currrent_feature_1 = seq[update_idx[0]]
                currrent_feature_2 = seq[update_idx[1]]
                currrent_feature_3 = seq[update_idx[2]]
                currrent_feature_4 = seq[update_idx[3]]
                currrent_feature_5 = seq[update_idx[4]]
                currrent_feature_6 = seq[update_idx[5]]
                currrent_feature_7 = seq[update_idx[6]]
                currrent_feature_8 = seq[update_idx[7]]
                currrent_feature_9 = seq[update_idx[8]]
                currrent_feature_10 = seq[update_idx[9]]
                
                current_feature_concate = torch.from_numpy(np.concatenate([currrent_feature_1, currrent_feature_2, currrent_feature_3, currrent_feature_4, currrent_feature_5, \
                                                              currrent_feature_6, currrent_feature_7, currrent_feature_8, currrent_feature_9, currrent_feature_10], axis=0)).unsqueeze(0)
                    
                
                
            fms.append(BEST_F_score)
    mean_fm = np.mean(fms)
    print("Average F-Score {:.1%}".format(mean_fm))
    dataset.close()
    model.train()
    return mean_fm

In [None]:
def evaluate_model(model, use_gpu):
    splits = read_json(SPLIT)
    use_gpu = torch.cuda.is_available()
    assert SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(SPLIT_ID, len(splits ))
    split = splits[SPLIT_ID]
    test_keys = split["test_keys"]
    return evaluate(model, test_keys, use_gpu)

## Datasets

In [None]:
class Charades_dataset(torch.utils.data.Dataset):
    def __init__(self, keys):
        self.keys = keys
        self.len = len(keys)
    
    def __getitem__(self, index):
        return self.keys[index]
    
    def __len__(self):
        return self.len

## model

In [None]:
class DSN(nn.Module):
    """ Deep Summarization Network """

    def __init__(self, in_dim = 1024, hid_dim = 256, num_layers = 1, cell = 'lstm'):
        super(DSN, self).__init__()
        assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru"

        if cell == 'lstm':
            self.rnn = nn.LSTM(in_dim, hid_dim, num_layers = num_layers, bidirectional = True, batch_first = True)
        elif cell == 'gru':
            self.rnn = nn.GRU(in_dim, hid_dim, num_layers = num_layers, bidirectional = True, batch_first = True)

        self.fc = nn.Linear(hid_dim * 2, 1)

    def forward(self, x):
        h, _ = self.rnn(x)
        p = T.sigmoid(self.fc(h))

        return p

In [None]:
class PRL_VS(nn.Module):
    
    def __init__(self):
        " global policy denotes the root policy, left policy denotes the left policy"
        super(PRL_VS, self).__init__()
        self.visual_feature_dim = Visual_Feature_Dim
        
        self.gobal_fc = nn.Linear(self.visual_feature_dim * Global_Seq_Len, 1024)
        self.local_fc = nn.Linear(self.visual_feature_dim * Local_Seq_Len, 1024)
        
        self.state_fc = nn.Linear(1024 * 3, 1024)

        self.gru = nn.GRUCell(1024, 1024)
        
        self.global_policy = nn.Linear(1024, Local_Seq_Len)#5 
        self.move_policy = nn.Linear(1024, 4)#6
        self.DSN = DSN()

        # Initializing weights
        self.apply(weights_init)
        self.global_policy.weight.data = normalized_columns_initializer(self.global_policy.weight.data, 0.01)
        self.global_policy.bias.data.fill_(0)

        self.move_policy.weight.data = normalized_columns_initializer(self.move_policy.weight.data, 0.01)
        self.move_policy.bias.data.fill_(0)

    def forward(self, seq, global_feature, local_feature, hidden_state):
    
        global_feature =self.gobal_fc(global_feature)
        global_feature_norm = F.normalize(global_feature, p = 2, dim = 1)
        global_feature_norm =  F.relu(global_feature_norm)#[batch_size, 1024]

        local_feature = self.local_fc(local_feature)
        local_feature_norm = F.normalize(local_feature, p = 2, dim = 1)
        local_feature_norm = F.relu(local_feature_norm)#[batch_size, 1024]
    
        
        #gate-attention
        assert local_feature_norm.size() == global_feature_norm.size()
        local_attention_feature = local_feature_norm * global_feature_norm#[batch_size, 1024]
        
        state_feature = torch.cat([local_attention_feature, global_feature_norm, local_feature_norm], 1)#[batch_size, 3072]


        state_feature = self.state_fc(state_feature)
        state_feature = F.relu(state_feature)#[batch_size, 1024]

        hidden_state = self.gru(state_feature, hidden_state)#[batch_size, 1024]

        global_policy = self.global_policy(hidden_state)
        move_policy = self.move_policy(hidden_state)

        iou_out2 = self.DSN(seq)

        return hidden_state, global_policy, move_policy, iou_out2

## train

In [None]:
def train():
    torch.manual_seed(SEED)
    use_gpu = torch.cuda.is_available()

    if use_gpu:
        print("Currently using GPU")
        cudnn.benchmark = True
        torch.cuda.manual_seed(SEED)
    else:
        print("Currently using CPU")    

    print("Initialize dataset {}".format(DATASET))
    dataset = h5py.File(DATASET, 'r')
    num_videos = len(dataset.keys())
    splits = read_json(SPLIT)

    if not TEST:
        assert SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(SPLIT_ID, len(splits))
        split = splits[SPLIT_ID]
        train_keys = split["train_keys"]
        test_keys = split["test_keys"]
        print("# total videos {}. # train videos {}. # test videos {}.".format(num_videos, len(train_keys), len(test_keys)))
        
    model = PRL_VS()
    print("Initialize model")
    print("Model Size: {:.5f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0))
    
    optimizer = torch.optim.Adam(model.parameters(), lr = LR, weight_decay = WEIGHT_DECAY)
    
    if STEP_SIZE > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size = STEP_SIZE, gamma = GAMMA)
    if use_gpu:
        model = nn.DataParallel(model).cuda()
        
    if RESUME:
        print("Retrain...")
        model_save_path = os.path.join(LOAD_DIR)
        print("Loading checkpoint from '{}'".format(LOAD_DIR))
        checkpoint = torch.load(LOAD_DIR, map_location='cpu')
        if use_gpu:
            model.module.load_state_dict(checkpoint)
        else:
            model.load_state_dict(checkpoint)
        start_epoch = START_EPOCH
    else:
        start_epoch = 0
        
    print("===> Start training")
    model.train()
    iteration = 0
    global_flag = False
    baselines = {key: 0. for key in train_keys} # baseline rewards for videos
    reward_writers = {key: [] for key in train_keys} # record reward changes for each video
    
    train_dataset = Charades_dataset(train_keys)
    trainloader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 0)
    
    
    
    for epoch in range(start_epoch, start_epoch + MAX_EPOCH): 
        sum_loss = 0.0
        Max_F_score = 0
        start_time = time.time()

        for batch_idx, key in enumerate(trainloader):
            seq = dataset[key[0]]['features'][...] # sequence of features, (seq_len, dim)
            seq_furtrue = torch.from_numpy(seq).unsqueeze(0)
            global_feature_concate, idx, idx_list, five_unit, num_units = inital_data_process(seq)#[1024]
            global_feature_concate = torch.from_numpy(global_feature_concate).unsqueeze(0)#[batch_size,1024]
            inital_feature_concate = global_feature_concate
            current_feature_concate = inital_feature_concate
            hidden_state = torch.zeros(BATCH_SIZE, Visual_Feature_Dim)
            

            if iteration % Switch_Iteration == 0:
                global_flag = not global_flag
                freeze_net(model, global_flag)
                
                
            for step in range(NUM_STEPS):
                    
                if use_gpu: 
                    hidden_state = hidden_state.cuda()
                    current_feature_concate = current_feature_concate.cuda()
                    global_feature_concate = global_feature_concate.cuda()
                    seq_furtrue = seq_furtrue.cuda()
                    
                #模型输出
                hidden_state, global_policy, move_policy, iou_out2 = model(seq_furtrue, global_feature_concate, current_feature_concate, hidden_state)

                global_policy_prob = F.softmax(global_policy, dim = 1)
                    
                if global_flag == True: 
                    global_policy_action = global_policy_prob.multinomial(num_samples = 1).data
                    global_policy_action = global_policy_action.cpu().numpy()[:, 0]
                else: 
                    global_policy_action = global_policy_prob.max(1, keepdim = True)[1].data.cpu().numpy()[:, 0]

                update_idx = idx
                local_abnormal_done = torch.ones(BATCH_SIZE)
                global_all_norm = torch.zeros(BATCH_SIZE, Local_Seq_Len, Local_Seq_Len)#训练根策略表示最后移动到的位置
                global_abnormal_done_all_norm = torch.ones(BATCH_SIZE, Local_Seq_Len)#训练根策略时记录异常情况
                
                if use_gpu: 
                    global_all_norm = global_all_norm.cuda()
                    global_abnormal_done_all_norm = global_abnormal_done_all_norm.cuda()   

                for i in range(BATCH_SIZE):

                    expect_reward = 0.0
                    expect_cost = 0.0
                    expect_loss = 0.0

                    if global_flag == True:
                        move_policy_prob = F.softmax(move_policy[i], dim = 0)
                        move_policy_action = move_policy_prob.max(0, keepdim=True)[1].data.cpu().numpy()

                        for local_id in range(Local_Seq_Len):
                            if move_policy_action == 0:
                                global_all_norm[i][local_id], _, _, global_abnormal_done_all_norm[i][local_id] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                global_all_norm[i][local_id], _, _, global_abnormal_done_all_norm[i][local_id] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                global_all_norm[i][local_id], _, _, global_abnormal_done_all_norm[i][local_id] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                global_all_norm[i][local_id], _, _, global_abnormal_done_all_norm[i][local_id] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                global_abnormal_done_all_norm[i][local_id] = 0

                    for local_id in range(Local_Seq_Len):
                        if global_policy_action[i] == local_id:      #表示对第几帧进行操作
                            move_policy_prob = F.softmax(move_policy[i], dim = 0)
                            if global_flag == True:  # train the global_layer
                                move_policy_action = move_policy_prob.max(0, keepdim = True)[1].data.cpu().numpy()
                            else:
                                move_policy_action = move_policy_prob.multinomial(num_samples=1).data
                                move_policy_action = move_policy_action.cpu().numpy()[0]
                            if move_policy_action == 0:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                local_abnormal_done[i] = 0

                    #对abnormal进行处理
                        
                    #更新数据
                    currrent_feature_1 = seq[update_idx[0]]
                    currrent_feature_2 = seq[update_idx[1]]
                    currrent_feature_3 = seq[update_idx[2]]
                    currrent_feature_4 = seq[update_idx[3]]
                    currrent_feature_5 = seq[update_idx[4]]
                    currrent_feature_6 = seq[update_idx[5]]
                    currrent_feature_7 = seq[update_idx[6]]
                    currrent_feature_8 = seq[update_idx[7]]
                    currrent_feature_9 = seq[update_idx[8]]
                    currrent_feature_10 = seq[update_idx[9]]
                    current_feature_concate = torch.from_numpy(np.concatenate([currrent_feature_1, currrent_feature_2, currrent_feature_3, currrent_feature_4, currrent_feature_5, \
                                                              currrent_feature_6, currrent_feature_7, currrent_feature_8, currrent_feature_9, currrent_feature_10], axis=0)).unsqueeze(0)
                    
                    #计算reward
                    change_index = global_policy_action[i]
                    train_key = key[0]
                    
                    if global_flag == True:
                        expect_reward = calculate_root_reward(change_index, update_idx, seq, global_all_norm, dataset, train_key, use_gpu)
                    else:
                        expect_reward = calculate_leaf_reward(change_index, update_idx, seq, dataset, train_key, use_gpu)
                    
                    m = Bernoulli(iou_out2)
                    actions = m.sample()
                    log_probs = m.log_prob(actions)
                    expect_reward = log_probs.mean() * (expect_reward)
                    
                    expect_loss = BETA * (iou_out2.mean() - 0.5) ** 2 - expect_reward
                    
                    optimizer.zero_grad()
                    expect_loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                    optimizer.step()
                    sum_loss += expect_loss
            
            iteration += 1  
            
        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds = elapsed))
        print("epoch {}/{}\t Loss {:.8f}\t {}".format(epoch + 1, MAX_EPOCH, sum_loss, elapsed))
        
        if (epoch + 1) % 5 == 0:
            F_score = evaluate_model(model, use_gpu)
            if F_score > Max_F_score:
                model_state_dict = model.module.state_dict() if use_gpu else model.state_dict()
                model_save_path = os.path.join(SAVE_DIR, 'model_epoch' + str(epoch) + '_'+ str(F_score) + '_' + str(DATASET_NAME) + '.pth.tar')
                save_checkpoint(model_state_dict, model_save_path)
                print("Model saved to {}".format(model_save_path))
                Max_F_score = F_score
                
    dataset.close()

In [None]:
train()

# EVALUATE

## old evaluate method

In [None]:
model = PRL_VS()
use_gpu = torch.cuda.is_available()
model_save_path = os.path.join('resualt/model_epoch104_0.40463972473017973_summe.pth.tar')
print("Loading checkpoint from '{}'".format(model_save_path))
checkpoint = torch.load(model_save_path, map_location = 'cpu')
model.load_state_dict(checkpoint)
evaluate_model(model, use_gpu)

### random_summary

In [None]:
def change_point_to_segment(change_point):
    segment = []
    for i in range(len(change_point)):
        segment.append(change_point[i][1])
    return segment

In [None]:
def get_summe_gssummary(dataset, test_keys):
    gold_standard = []
    
    for i in range(len(test_keys)):
        gold_standard.append(
            {
                'gs_summary': dataset[test_keys[i]]['user_summary'][...],
                'video': test_keys[i]
            }
        )
    return gold_standard

In [None]:
def knapsack(items, maxweight):
    N = len(items)
    W = maxweight

    bestvalues = [[0] * (W + 1)
                  for i in range(N + 1)]

    for i, (value, weight) in enumerate(items):

        for capacity in range(maxweight + 1):

            if weight > capacity:
                bestvalues[i + 1][capacity] = bestvalues[i][capacity]
            else:
                candidate1 = bestvalues[i][capacity]
                candidate2 = bestvalues[i][capacity - weight] + value
                bestvalues[i + 1][capacity] = max(candidate1, candidate2)

    reconstruction = []
    j = maxweight
    for i in range(N, 0, -1):
        if bestvalues[i][j] != bestvalues[i - 1][j]:
            reconstruction.append(i - 1)
            j -= items[i - 1][1]

    reconstruction.reverse()

    return bestvalues[len(items)][maxweight], reconstruction

def summarize(score, segment, capacity, use_sum=False):
        # generate summary
        score = np.asarray(score).ravel()
        f_idx = np.zeros_like(score)
        
        score = np.split(score, segment)
        score = list(filter(lambda x: x.size, score)) # remove empty elements
        
        f_idx = np.split(f_idx, segment)
        f_idx = list(filter(lambda x: x.size, f_idx)) # remove empty elements
        
        weights = [x.size for x in score]
        
        if use_sum:
            values = [x.sum() for x in score]
        else:
            values = [x.mean() for x in score]
        
        _, selected_cut = knapsack([(v, w) for v, w in zip(values, weights)], capacity)
        for si in selected_cut:
            f_idx[si][:] = 1
        
        return np.hstack(f_idx)
    
def get_random_summary(N, segment, budget):
    rand_score = np.random.random((N,))
    rand_summary = summarize(rand_score, segment, int(N * budget))
    return rand_summary

def evaluate_baseline(verbose = True):
    dataset = h5py.File(DATASET, 'r')
    splits = read_json(SPLIT)
    use_gpu = torch.cuda.is_available()
    assert SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(SPLIT_ID, len(splits ))
    split = splits[SPLIT_ID]
    test_keys = split["test_keys"]
    gt_summary = get_summe_gssummary(dataset, test_keys)
    b_score = []
    
    for item in gt_summary:
        gs_summary = item['gs_summary']
        N = gs_summary.shape[1]
        segment = change_point_to_segment(dataset[item['video']]['change_points'])
        
        rand_summary = get_random_summary(N, segment, budget=0.15)
        
        f1_scores = [f1_score(x, rand_summary) for x in gs_summary]
        f1_min = min(f1_scores)
        f1_mean = sum(f1_scores) / len(f1_scores)
        f1_max = max(f1_scores)
        
        b_score.append((f1_min, f1_mean, f1_max))
        
        if verbose:
            print('%25s | %6.2f | %6.2f | %6.2f |' % (item['video'], f1_min * 100, f1_mean * 100, f1_max * 100))
        
    b_score = np.array(b_score)
    score_summary = b_score.mean(axis=0)
    
    if verbose:
        print('%25s | %6.2f | %6.2f | %6.2f |' % ('Avg.', score_summary[0] * 100, score_summary[1] * 100, score_summary[2] * 100))
    
    dataset.close()
    return {'method': 'Random',
            'min': score_summary[0],
            'avg': score_summary[1],
            'max': score_summary[2]}

In [None]:
evaluate_baseline()

### user_summary

In [None]:
def get_rc_func(metric):
    if metric == 'kendalltau':
        f = lambda x, y: kendalltau(rankdata(-x), rankdata(-y))
    elif metric == 'spearmanr':
        f = lambda x, y: spearmanr(x, y)
    else:
        raise RuntimeError
    return f

In [None]:
def new_evaluate_method_F_score(summary, user_summary):
    score = []
    baseline_score = []
    N = gs_summary.shape[1]
    f1_scores = [f1_score(x, summary) for x in user_summary]
    f1_min = min(f1_scores)
    f1_mean = sum(f1_scores) / len(f1_scores)
    f1_max = max(f1_scores)
    score.append(f1_min * 100)
    score.append(f1_mean * 100)
    score.append(f1_max * 100)
    return score

In [None]:
def new_evaluate_method_metric(summary, user_summary, metric):
    score = []
    baseline_score = []
    N = gs_summary.shape[1]
    rc_func = get_rc_func(metric)
    f1_scores = [rc_func(x, summary)[0] for x in user_summary]
    f1_min = min(f1_scores)
    f1_mean = sum(f1_scores) / len(f1_scores)
    f1_max = max(f1_scores)
    score.append(f1_min * 100)
    score.append(f1_mean * 100)
    score.append(f1_max * 100)
    return score

In [None]:
def new_evaluate_model(model, use_gpu, metric):
    print("===> Evaluation")
    dataset = h5py.File(DATASET, 'r')
    splits = read_json(SPLIT)
    use_gpu = torch.cuda.is_available()
    assert SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(SPLIT_ID, len(splits ))
    split = splits[SPLIT_ID]
    test_keys = split["test_keys"]
    score_list = []
    
    with torch.no_grad():
        model.eval()
        if use_gpu: model = model.cuda()
        fms = []
        eval_metric = 'avg' if METRIC == 'tvsum' else 'max'
        
        test_dataset = Charades_dataset(test_keys)
        testloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 0)
        for batch_idx, key in enumerate(testloader):
            seq = dataset[key[0]]['features'][...]
            seq_furtrue = torch.from_numpy(seq).unsqueeze(0) 
            global_feature_concate, idx, idx_list, five_unit, num_units = inital_data_process(seq)#[1024]
            global_feature_concate = torch.from_numpy(global_feature_concate).unsqueeze(0)        #[batch_size,1024]
            hidden_state = torch.zeros(BATCH_SIZE, Visual_Feature_Dim)
            current_feature_concate = global_feature_concate
            score = [0.0, 0.0, 0.0]
            
            for step in range(NUM_STEPS):
                if use_gpu: 
                    hidden_state = hidden_state.cuda()
                    current_feature_concate = current_feature_concate.cuda()
                    global_feature_concate = global_feature_concate.cuda()

                hidden_state, global_policy, move_policy, iou_out2 = model(seq_furtrue, global_feature_concate, current_feature_concate, hidden_state)


                probs = iou_out2.data.cpu().squeeze().numpy()
                cps = dataset[key[0]]['change_points'][...]
                num_frames = dataset[key[0]]['n_frames'][()]
                nfps = dataset[key[0]]['n_frame_per_seg'][...].tolist()
                positions = dataset[key[0]]['picks'][...]
                user_summary = dataset[key[0]]['user_summary'][...]

                machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
                new_score = new_evaluate_method_metric(machine_summary, user_summary, metric)
                
                score[0] = max(score[0], new_score[0])
                score[1] = max(score[1], new_score[1])
                score[2] = max(score[2], new_score[2])

                global_policy_prob = F.softmax(global_policy, dim = 1)
                global_policy_action = global_policy_prob.max(1, keepdim = True)[1].data.cpu().numpy()[:, 0]


                update_idx = idx
                local_abnormal_done = torch.ones(BATCH_SIZE)

                for i in range(BATCH_SIZE):

                    for local_id in range(Local_Seq_Len):
                        if global_policy_action[i] == local_id:      #表示对第几帧进行操作
                            move_policy_prob = F.softmax(move_policy[i], dim = 0)
                            move_policy_action = move_policy_prob.max(0, keepdim=True)[1].data.cpu().numpy()

                            if move_policy_action == 0:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                local_abnormal_done[i] = 0
                
                currrent_feature_1 = seq[update_idx[0]]
                currrent_feature_2 = seq[update_idx[1]]
                currrent_feature_3 = seq[update_idx[2]]
                currrent_feature_4 = seq[update_idx[3]]
                currrent_feature_5 = seq[update_idx[4]]
                currrent_feature_6 = seq[update_idx[5]]
                currrent_feature_7 = seq[update_idx[6]]
                currrent_feature_8 = seq[update_idx[7]]
                currrent_feature_9 = seq[update_idx[8]]
                currrent_feature_10 = seq[update_idx[9]]
                current_feature_concate = torch.from_numpy(np.concatenate([currrent_feature_1, currrent_feature_2, currrent_feature_3, currrent_feature_4, currrent_feature_5, \
                                                          currrent_feature_6, currrent_feature_7, currrent_feature_8, currrent_feature_9, currrent_feature_10], axis=0)).unsqueeze(0)
                      
    
            score_list.append(score)
        score_list = np.array(score_list)
        score_summary = score_list.mean(axis = 0)
    dataset.close()
    model.train()
    return {'method': 'machine',
            'min': score_summary[0],
            'avg': score_summary[1],
            'max': score_summary[2]}

### F_score_ run

In [None]:
def run():
    model = PRL_VS()
    use_gpu = torch.cuda.is_available()
    model_save_path = os.path.join('./resualt/best_resualt/tvsum_62.0/model_epoch4_0.62016888935977_tvsum.pth.tar')
    print("Loading checkpoint from '{}'".format(model_save_path))
    checkpoint = torch.load(model_save_path, map_location = 'cpu')
    model.load_state_dict(checkpoint)
    
    metric = 'spearmanr'#'spearmanr''kendalltau'
    score_summary = new_evaluate_model(model, use_gpu, metric)
    
    print('evaluating baseline scores')
    N = 100
    res = Parallel(n_jobs = -1)( [delayed(evaluate_baseline)(verbose = False) for _ in range(N)] )
    res.append(score_summary)
    df = pd.DataFrame(res)
    print(df[df.method == 'Random'][['min', 'avg', 'max']].describe())
    return df

In [None]:
run()

## new evaluate method（'spearmanr' or 'kendalltau'）

### Compute rank order statistics

In [None]:
def get_rc_func(metric):
    if metric == 'kendalltau':
        f = lambda x, y: kendalltau(rankdata(-x), rankdata(-y))
    elif metric == 'spearmanr':
        f = lambda x, y: spearmanr(x, y)
    else:
        raise RuntimeError
    return f

In [None]:
def new_evaluate_metric(model, use_gpu, metric):
    print("===> Evaluation ===> Metric:" + metric)
    dataset = h5py.File(DATASET, 'r')
    user_anno_datasets = h5py.File('./datasets/new_summe_user_anno.h5', 'r')
    splits = read_json(SPLIT)
    use_gpu = torch.cuda.is_available()
    assert SPLIT_ID < len(splits), "split_id (got {}) exceeds {}".format(SPLIT_ID, len(splits ))
    split = splits[SPLIT_ID]
    test_keys = split["test_keys"]
    score_list = []
    
    with torch.no_grad():
        model.eval()
        if use_gpu: model = model.cuda()
        
        test_dataset = Charades_dataset(test_keys)
        testloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 0)
        score_list = []
        for batch_idx, key in enumerate(testloader):
            seq = dataset[key[0]]['features'][...]
            seq_furtrue = torch.from_numpy(seq).unsqueeze(0) 
            global_feature_concate, idx, idx_list, five_unit, num_units = inital_data_process(seq)#[1024]
            global_feature_concate = torch.from_numpy(global_feature_concate).unsqueeze(0)        #[batch_size,1024]
            hidden_state = torch.zeros(BATCH_SIZE, Visual_Feature_Dim)
            current_feature_concate = global_feature_concate
            Best_score = - 100.0

            for step in range(NUM_STEPS):
                if use_gpu: 
                    hidden_state = hidden_state.cuda()
                    current_feature_concate = current_feature_concate.cuda()
                    global_feature_concate = global_feature_concate.cuda()
                    seq_furtrue = seq_furtrue.cuda()

                hidden_state, global_policy, move_policy, iou_out2 = model(seq_furtrue, global_feature_concate, current_feature_concate, hidden_state)
                rc_func = get_rc_func(metric)


                probs = iou_out2.data.cpu().squeeze().numpy()
                user_anno = user_anno_datasets[key[0]]['user_anno'][...]
                assert probs.shape[0] == user_anno.shape[1]
                
                
                D = []
                D = [rc_func(probs, x)[0] for x in user_anno]
                t = np.mean(D)
                
                if t > Best_score:
                    Best_score = t

                global_policy_prob = F.softmax(global_policy, dim = 1)
                global_policy_action = global_policy_prob.max(1, keepdim = True)[1].data.cpu().numpy()[:, 0]


                update_idx = idx
                local_abnormal_done = torch.ones(BATCH_SIZE)

                for i in range(BATCH_SIZE):

                    for local_id in range(Local_Seq_Len):
                        if global_policy_action[i] == local_id:      #表示对第几帧进行操作
                            move_policy_prob = F.softmax(move_policy[i], dim = 0)
                            move_policy_action = move_policy_prob.max(0, keepdim=True)[1].data.cpu().numpy()

                            if move_policy_action == 0:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                local_abnormal_done[i] = 0
                
                currrent_feature_1 = seq[update_idx[0]]
                currrent_feature_2 = seq[update_idx[1]]
                currrent_feature_3 = seq[update_idx[2]]
                currrent_feature_4 = seq[update_idx[3]]
                currrent_feature_5 = seq[update_idx[4]]
                currrent_feature_6 = seq[update_idx[5]]
                currrent_feature_7 = seq[update_idx[6]]
                currrent_feature_8 = seq[update_idx[7]]
                currrent_feature_9 = seq[update_idx[8]]
                currrent_feature_10 = seq[update_idx[9]]
                current_feature_concate = torch.from_numpy(np.concatenate([currrent_feature_1, currrent_feature_2, currrent_feature_3, currrent_feature_4, currrent_feature_5, \
                                                          currrent_feature_6, currrent_feature_7, currrent_feature_8, currrent_feature_9, currrent_feature_10], axis=0)).unsqueeze(0)
                      
            
            score_list.append(Best_score)
        print(np.mean(score_list))
    user_anno_datasets.close()
    dataset.close()
    model.train()
    return np.mean(score_list)

In [None]:
model = PRL_VS()
use_gpu = torch.cuda.is_available()
model_save_path = os.path.join('./resualt/best_resualt/summe/model_epoch4_0.454531550077155_summe.pth.tar')
print("Loading checkpoint from '{}'".format(model_save_path))
checkpoint = torch.load(model_save_path, map_location = 'cpu')
model.load_state_dict(checkpoint)

metric = 'kendalltau'#'spearmanr' or 'kendalltau'
score = new_evaluate_metric(model, use_gpu, metric)

### Human

In [None]:
data = h5py.File('./datasets/summe_user_anno.h5', 'r')

In [None]:
def get_rc_func(metric):
    if metric == 'kendalltau':
        f = lambda x, y: kendalltau(rankdata(-x), rankdata(-y))
    elif metric == 'spearmanr':
        f = lambda x, y: spearmanr(x, y)
    else:
        raise RuntimeError
    return f

class RankCorrelationEvaluator(object):
    
    def __call__(self):
        res = []
        for d in data:
            user_anno = data[d]['user_anno'][...]
            N = user_anno.shape[1]

            D = []
            mean_tau = []
            min_tau = []
            max_tau = []

            pred_x = self.get_score(d)
            D = [self.rc_func(x, pred_x)[0] for x in user_anno]

            res.append({'video': d,
                        'mean': np.mean(D),
                       'min': np.min(D), 
                       'max': np.max(D), 
                        'cc': np.asarray(D)
                       })
        return res

class HumanEvaluator(RankCorrelationEvaluator):
    def __init__(self, metric):
        self.rc_func = get_rc_func(metric)
    
    def __call__(self):
        res = []
        for d in data:
            user_anno = data[d]['user_anno'][...]
            N = user_anno.shape[1]
            
            max_rc = []
            min_rc = []
            avr_rc = []
            rc = []
            for i, x in enumerate(user_anno):
                R = [self.rc_func(x, user_anno[j])[0] for j in range(len(user_anno)) if j != i]
                
                max_rc.append(max(R))
                min_rc.append(min(R))
                avr_rc.append(np.mean(R))
                rc += R
                
            res.append({'video': d,
                        'mean': np.mean(avr_rc),
                       'min': np.mean(min_rc), 
                       'max': np.mean(max_rc), 
                        'cc': np.asarray(rc)
                       })
        return res
    
class RandomEvaluator(RankCorrelationEvaluator):
    def __init__(self, metric):
        self.rc_func = get_rc_func(metric)
        
        rand_scores = {}
        for d in data:
            user_anno = data[d]['user_anno'][...]
            N = user_anno.shape[1]
            rand_scores[d] = np.random.random((N,))
            
        self.rand_scores = rand_scores
            
    def get_score(self, v_id):
        return self.rand_scores[v_id]


In [None]:
metric = 'spearmanr'
human_res = HumanEvaluator(metric)()
mean_arr = np.asarray([x['mean'] for x in human_res])
print('human'+': mean %.3f'%(np.mean(mean_arr)))

In [None]:
metric = 'kendalltau'
human_res = HumanEvaluator(metric)()
mean_arr = np.asarray([x['mean'] for x in human_res])
print('human'+': mean %.3f'%(np.mean(mean_arr)))

In [None]:
data.close()

### Importance score correlations visualization

In [None]:
%matplotlib inline
import numpy as np
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

##### 生成模型的pred文件

In [None]:
def generate_pred(model, use_gpu):
    print("===> generate pred h5 file")
    dataset = h5py.File(DATASET, 'r')
    vlist = []
    for key in dataset.keys():
        vlist.append(key)
    
    h5_file = h5py.File('./datasets/summe_pred_47.35.h5', 'w')
    
    for idx in range(50):
        h5_file.create_group('video_{}'.format(idx+1))
    
    with torch.no_grad():
        model.eval()
        if use_gpu: model = model.cuda()
        fms = []
        eval_metric = 'avg' if METRIC == 'tvsum' else 'max'
        
        test_dataset = Charades_dataset(vlist)
        testloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 0)
        
        for batch_idx, key in enumerate(testloader):
            seq = dataset[key[0]]['features'][...]
            seq_furtrue = torch.from_numpy(seq).unsqueeze(0) 
            global_feature_concate, idx, idx_list, five_unit, num_units = inital_data_process(seq)#[1024]
            global_feature_concate = torch.from_numpy(global_feature_concate).unsqueeze(0)        #[batch_size,1024]
            hidden_state = torch.zeros(BATCH_SIZE, Visual_Feature_Dim)
            current_feature_concate = global_feature_concate
            BEST_F_score = 0.0

            for step in range(NUM_STEPS):
                if use_gpu: 
                    hidden_state = hidden_state.cuda()
                    current_feature_concate = current_feature_concate.cuda()
                    global_feature_concate = global_feature_concate.cuda()
                    seq_furtrue = seq_furtrue.cuda()

                hidden_state, global_policy, move_policy, iou_out2 = model(seq_furtrue, global_feature_concate, current_feature_concate, hidden_state)


                probs = iou_out2.data.cpu().squeeze().numpy()
                cps = dataset[key[0]]['change_points'][...]
                num_frames = dataset[key[0]]['n_frames'][()]
                nfps = dataset[key[0]]['n_frame_per_seg'][...].tolist()
                positions = dataset[key[0]]['picks'][...]
                user_summary = dataset[key[0]]['user_summary'][...]

                machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
                fm, _, _ = evaluate_summary(machine_summary, user_summary, eval_metric)

                if fm > BEST_F_score:
                    BEST_F_score = fm
                    h5_file[key[0]]['probs'] = probs
                    

                global_policy_prob = F.softmax(global_policy, dim = 1)
                global_policy_action = global_policy_prob.max(1, keepdim = True)[1].data.cpu().numpy()[:, 0]


                update_idx = idx
                local_abnormal_done = torch.ones(BATCH_SIZE)

                for i in range(BATCH_SIZE):

                    for local_id in range(Local_Seq_Len):
                        if global_policy_action[i] == local_id:      #表示对第几帧进行操作
                            move_policy_prob = F.softmax(move_policy[i], dim = 0)
                            move_policy_action = move_policy_prob.max(0, keepdim=True)[1].data.cpu().numpy()

                            if move_policy_action == 0:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 1:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_move_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 2:
                                update_idx, _, _, local_abnormal_done[i] = determine_left_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            elif move_policy_action == 3:
                                update_idx, _, _, local_abnormal_done[i] = determine_right_offset_range(idx, local_id, idx[local_id], five_unit, num_units)
                            else:
                                local_abnormal_done[i] = 0
                
                currrent_feature_1 = seq[update_idx[0]]
                currrent_feature_2 = seq[update_idx[1]]
                currrent_feature_3 = seq[update_idx[2]]
                currrent_feature_4 = seq[update_idx[3]]
                currrent_feature_5 = seq[update_idx[4]]
                currrent_feature_6 = seq[update_idx[5]]
                currrent_feature_7 = seq[update_idx[6]]
                currrent_feature_8 = seq[update_idx[7]]
                currrent_feature_9 = seq[update_idx[8]]
                currrent_feature_10 = seq[update_idx[9]]
                current_feature_concate = torch.from_numpy(np.concatenate([currrent_feature_1, currrent_feature_2, currrent_feature_3, currrent_feature_4, currrent_feature_5, \
                                                          currrent_feature_6, currrent_feature_7, currrent_feature_8, currrent_feature_9, currrent_feature_10], axis=0)).unsqueeze(0)
                      
            
    h5_file.close()
    dataset.close()
    model.train()
    print("pred h5 file is finished")
    return 0

In [None]:
# model = PRL_VS()
# use_gpu = torch.cuda.is_available()
# model_save_path = os.path.join('./resualt/best_resualt/summe/model_epoch19_0.47265147229948906_summe.pth.tar')
# print("Loading checkpoint from '{}'".format(model_save_path))
# checkpoint = torch.load(model_save_path, map_location = 'cpu')
# model.load_state_dict(checkpoint)
# generate_pred(model, use_gpu)

In [None]:
def accum_eval(pred, gt):
    total = gt.mean(axis=0).sum()
    x = np.argsort(pred)[::-1]
    y = [0]
    for i in range(x.size):
        cur_score = y[-1]
        y.append(cur_score + gt[:, x[i]].mean())
    y = np.asarray(y[1:]) / total
    return y

def best_curve(gt):
    total = gt.mean(axis=0).sum()
    x = np.argsort(gt.mean(axis=0))[::-1]
    
    y = [0]
    for i in range(x.size):
        cur_score = y[-1]
        y.append(cur_score + gt[:, x[i]].mean())

    y = np.asarray(y[1:]) / total
    
    return y

def worst_curve(gt):
    total = gt.mean(axis=0).sum()
    x = np.argsort(gt.mean(axis=0))
    
    y = [0]
    for i in range(x.size):
        cur_score = y[-1]
        y.append(cur_score + gt[:, x[i]].mean())
    y = np.asarray(y[1:]) / total
    return y

def plot_user_scores(user_anno):
    user_anno = (user_anno - 0)/18.
    N = len(user_anno)

    plt.figure(figsize=(5,5))

    # upper-bound
    best_y = best_curve(user_anno)
    best_auc = auc(np.linspace(0, 1, best_y.size), best_y)

    # lower-bound
    worst_y = worst_curve(user_anno)
    worst_auc = auc(np.linspace(0, 1, worst_y.size), worst_y)

    plt.fill_between(range(len(best_y)), worst_y, best_y, color='lightblue', alpha=.5)

    mean_auc = 0
    for i in range(N):
        pred = user_anno[i]
        y = accum_eval(pred, user_anno[list(range(i))+list(range(i+1, N))])
        mean_auc += auc(np.linspace(0, 1, y.size), y)
        p0 = plt.plot(y, color='lightcoral', alpha=.5)
        
    return mean_auc, best_auc, worst_auc, p0

def plot_curve_rlvsumm():
    tvsum_data = h5py.File('./datasets/summe_user_anno.h5', 'r')
    pred_h5 = h5py.File('./datasets/summe_pred_47.35.h5', 'r')

    human_auc_summary = []
    random_auc_summary = []
    model_auc_summary = []
    rel_human_auc = []
    rel_random_auc = []
    rel_model_auc = []
    
    
    for gt in tvsum_data:
        user_anno = tvsum_data[gt]['user_anno'][...]
        n_fr = user_anno.shape[1]
        N = len(user_anno)

        human_mean_auc, best_auc, worst_auc, p0 = plot_user_scores(user_anno)
        human_auc_summary.append(human_mean_auc / N)
        rel_human_auc.append((human_auc_summary[-1]-worst_auc) / (best_auc-worst_auc)*100)

        # plot curve by random scoring
        pred = np.random.random((n_fr))
        y = accum_eval(pred, user_anno)
        p1 = plt.plot(y, color='k', linestyle='--')
        random_auc = auc(np.linspace(0, 1, y.size), y)
        random_auc_summary.append(random_auc)
        rel_random_auc.append((random_auc-worst_auc) / (best_auc-worst_auc)*100)
        
        assert len(pred) == user_anno.shape[1]
        pred = pred_h5[gt]['probs'][...]
        y = accum_eval(pred, user_anno)
        p2 = plt.plot(y, color='royalblue', linestyle='--')
        model_auc = auc(np.linspace(0, 1, y.size), y)
        model_auc_summary.append(model_auc)
        rel_model_auc.append((model_auc-worst_auc) / (best_auc-worst_auc)*100)

        
        plt.legend((p0[0], p1[0], p2[0]), ('Humans', 'Random', 'model'))
        plt.title(gt)
        plt.savefig('./imgs/summe/' + gt +'.png')
        plt.show()
        plt.close('all')
    
    tvsum_data.close()
    pred_h5.close()
    print('random:', sum(random_auc_summary)/len(random_auc_summary), sum(rel_random_auc)/len(rel_random_auc), '\n',
         'human:', sum(human_auc_summary)/len(human_auc_summary), sum(rel_human_auc)/len(rel_human_auc),'\n',
         'model',sum(model_auc_summary)/len(model_auc_summary), sum(rel_model_auc)/len(rel_model_auc))

In [None]:
sns.set_context('notebook', font_scale=1.3)
plot_curve_rlvsumm()