## Trirachical Tranining

**Packages**

In [1]:
import h5py
import math
import random
import numpy as np
import os
import os.path as osp
import json
from tqdm import tqdm

import import_ipynb
from Model import Trirar

from ortools.algorithms import pywrapknapsack_solver

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable

importing Jupyter notebook from Model.ipynb


### Util modules

In [2]:
def mkdir_if_missing(directory):
    if not osp.exists(directory):
        try:
            os.makedirs(directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

def write_json(obj, fpath):
    mkdir_if_missing(osp.dirname(fpath))
    with open(fpath, 'w') as f:
        json.dump(obj, f, indent=4, separators=(',', ': '))


In [3]:
def split_random(keys, num_videos, num_train):
    """Random split"""
    train_keys, test_keys = [], []
    rnd_idxs = np.random.choice(range(num_videos), size=num_train, replace=False)
    for key_idx, key in enumerate(keys):
        if key_idx in rnd_idxs:
            train_keys.append(key)
        else:
            test_keys.append(key)

    assert len(set(train_keys) & set(test_keys)) == 0, "Error: train_keys and test_keys overlap"

    return train_keys, test_keys

In [4]:
def print_table(table, cell_width=[3,10,13,13]):
    slen=sum(cell_width)+len(cell_width)*2+2
    print('-'*slen)
    header = table.pop(0)
    for i, head in enumerate(header):
        print('  {name: <{alignment}}'.format(name=head, alignment=cell_width[i]), end='')

    print('')
    print('='*slen)
    for row in table:
        for i, val in enumerate(row):
            print('  {val: <{alignment}}'.format(val=val, alignment=cell_width[i]), end='')
        print('')
    print('-'*slen)

In [5]:
def parse_splits_filename(splits_filename):
    # Parse split file and count number of k_folds
    spath, sfname = os.path.split(splits_filename)
    sfname, _ = os.path.splitext(sfname)
    dataset_name = sfname.split('_')[0]  # Get dataset name e.g. tvsum
    dataset_type = sfname.split('_')[1]  # augmentation type e.g. aug

    # The keyword 'splits' is used as the filename fields terminator from historical reasons.
    if dataset_type == 'splits':
        # Split type is not present
        dataset_type = ''

    # Get number of discrete splits within each split json file
    with open(splits_filename, 'r') as sf:
        splits = json.load(sf)

    return dataset_name, dataset_type, splits


In [6]:
def get_frame_probs(shot_probs, cps, n_frames, device):
    if len(shot_probs) != len(cps):
        print('no. of shots does not match:', len(shot_probs),len(cps))
        return
    frame_probs = torch.zeros(n_frames, dtype=torch.float32, device = device)
    n_segs = cps.shape[0]
    for seg_idx in range(n_segs):
        first, last = cps[seg_idx]
        first, last =  int(first.item()), int(last.item())
        frame_probs[first:last + 1] = shot_probs[seg_idx]
        
    return frame_probs

In [7]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname == 'Linear':
        init.xavier_uniform_(m.weight, gain=np.sqrt(2.0))
        if m.bias is not None:
            init.constant_(m.bias, 0.1)

In [8]:
def knapsack_ortools(values, weights, items, capacity ):
    scale = 1000
    values = np.array(values)
    weights = np.array(weights)
    values = (values * scale).astype(np.int32)
    weights = (weights).astype(np.int32)
    capacity = capacity
    osolver = pywrapknapsack_solver.KnapsackSolver(pywrapknapsack_solver.KnapsackSolver.KNAPSACK_DYNAMIC_PROGRAMMING_SOLVER,'test')
    osolver.Init(values.tolist(), [weights.tolist()], [capacity])
    computed_value = osolver.Solve()
    packed_items = [x for x in range(0, len(weights))
                    if osolver.BestSolutionContains(x)]

    return packed_items

In [9]:
def generate_summary(ypred, cps, n_frames, nfps, positions, proportion=0.15, method='knapsack'):
    """Generate keyshot-based video summary i.e. a binary vector.
    Args:
    ---------------------------------------------
    - ypred: predicted importance scores.
    - cps: change points, 2D matrix, each row contains a segment.
    - n_frames: original number of frames.
    - nfps: number of frames per segment.
    - positions: positions of subsampled frames in the original video.
    - proportion: length of video summary (compared to original video length).
    - method: defines how shots are selected, ['knapsack', 'rank'].
    """
    n_segs = cps.shape[0]
    if n_segs!=len(ypred):
        print('Error')
        return
    # frame_scores = np.zeros((n_frames), dtype=np.float32)
    # if positions.dtype != int:
    #     positions = positions.astype(np.int32)
    # if positions[-1] != n_frames:
    # #     positions = np.concatenate([positions, [n_frames]])
    # for i in range(len(positions) - 1):
    #     pos_left, pos_right = positions[i], positions[i+1]
    #     if i == len(ypred):
    #         frame_scores[pos_left:pos_right] = 0
    #     else:
    #         frame_scores[pos_left:pos_right] = ypred[i]

#     seg_score = []
#     for seg_idx in range(n_segs):
#         start, end = int(cps[seg_idx,0]), int(cps[seg_idx,1]+1)
#         scores = frame_scores[start:end]
#         seg_score.append(float(scores.mean()))

    seg_score = ypred

    limits = int(math.floor(n_frames * proportion))

    if method == 'knapsack':
        #picks = knapsack_dp(seg_score, nfps, n_segs, limits)
        picks = knapsack_ortools(seg_score, nfps, n_segs, limits)
    elif method == 'rank':
        order = np.argsort(seg_score)[::-1].tolist()
        picks = []
        total_len = 0
        for i in order:
            if total_len + nfps[i] < limits:
                picks.append(i)
                total_len += nfps[i]
    else:
        raise KeyError("Unknown method {}".format(method))

    summary = np.zeros((1), dtype=np.float32) # this element should be deleted
    for seg_idx in range(n_segs):
        nf = nfps[seg_idx]
        if seg_idx in picks:
            tmp = np.ones((nf), dtype=np.float32)
        else:
            tmp = np.zeros((nf), dtype=np.float32)
        summary = np.concatenate((summary, tmp))

    summary = np.delete(summary, 0) # delete the first element
    return summary


def evaluate_usersummary(machine_summary, user_summary, eval_metric='avg'):
    """Compare machine summary with user summary (keyshot-based).
    Args:
    --------------------------------
    machine_summary and user_summary should be binary vectors of ndarray type.
    eval_metric = {'avg', 'max'}
    'avg' averages results of comparing multiple human summaries.
    'max' takes the maximum (best) out of multiple comparisons.
    """
    machine_summary = machine_summary.astype(np.float32)
    user_summary = user_summary.astype(np.float32)
    n_users,n_frames = user_summary.shape

    # binarization
    machine_summary[machine_summary > 0] = 1
    user_summary[user_summary > 0] = 1

    if len(machine_summary) > n_frames:
        machine_summary = machine_summary[:n_frames]
    elif len(machine_summary) < n_frames:
        zero_padding = np.zeros((n_frames - len(machine_summary)))
        machine_summary = np.concatenate([machine_summary, zero_padding])

    f_scores = []
    prec_arr = []
    rec_arr = []

    for user_idx in range(n_users):
        gt_summary = user_summary[user_idx,:]
        overlap_duration = (machine_summary * gt_summary).sum()
        precision = overlap_duration / (machine_summary.sum() + 1e-8)
        recall = overlap_duration / (gt_summary.sum() + 1e-8)
        if precision == 0 and recall == 0:
            f_score = 0.
        else:
            f_score = (2 * precision * recall) / (precision + recall)
        f_scores.append(f_score)
        prec_arr.append(precision)
        rec_arr.append(recall)

    if eval_metric == 'avg':
        final_f_score = np.mean(f_scores)
        final_prec = np.mean(prec_arr)
        final_rec = np.mean(rec_arr)
    elif eval_metric == 'max':
        final_f_score = np.max(f_scores)
        max_idx = np.argmax(f_scores)
        final_prec = prec_arr[max_idx]
        final_rec = rec_arr[max_idx]
    
    return final_f_score, final_prec, final_rec


In [10]:
def evaluate_gtsummary(machine_summary, gt_summary):
    
    machine_summary = machine_summary.astype(np.float32)
    user_summary = gt_summary

    # binarization
    machine_summary[machine_summary > 0] = 1
    gt_summary[gt_summary > 0] = 1
    n_frames = gt_summary.shape[0]

    if len(machine_summary) > n_frames:
        machine_summary = machine_summary[:n_frames]
    elif len(machine_summary) < n_frames:
        zero_padding = np.zeros((n_frames - len(machine_summary)))
        machine_summary = np.concatenate([machine_summary, zero_padding])

    overlap_duration = (machine_summary * gt_summary).sum()
    precision = overlap_duration / (machine_summary.sum() + 1e-8)
    recall = overlap_duration / (gt_summary.sum() + 1e-8)
    if precision == 0 and recall == 0:
        f_score = 0.
    else:
        f_score = (2 * precision * recall) / (precision + recall)

    return f_score, precision, recall


### Training Hyper Parameters

In [112]:
class HParameters:
        
    def __init__(self, args):
        
        self.verbose = args['verbose']
        self.use_cuda = args['use_cuda']

        if self.use_cuda:
            self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
        else: 
            self.device = torch.device('cpu')

        self.max_summary_length = args['max_summary_length']

        self.l2_req = 0.00001
        self.lr_epochs = [0]
        self.lr = [0.00005]
        self.epochs_max = args['epochs']
        self.train_batch_size = args['train_batch_size']

        self.dataset=args['dataset']
        self.scene_segs = args['Scene_segments']
        self.results_path = args['results_path']
        self.num_splits = args['num_splits']
        self.split_file = args['split_file']
        self.train_percent = args['train_percent']
        self.model_folder = args['model_folder']+'/'
        
        if 'model_path' in args:
            self.model_path = args['model_path']
        else:
            self.model_path = None
        
        with open(self.results_path, "w") as f:
            f.write('Epoch\tTrainLoss\tValLoss\n')
        
        return


    def create_split(self):
        print("Loading dataset from {}".format(self.dataset))
        
        with h5py.File(self.dataset, 'r') as dataset:
            keys = dataset.keys()
            num_videos = len(keys)
            num_train = int(math.ceil(num_videos * self.train_percent))
            num_test = num_videos - num_train

            print("Split breakdown: # total videos {}. # train videos {}. # test videos {}".format(num_videos, num_train, num_test))
            splits = []

            for split_idx in range(self.num_splits):
                train_keys, test_keys = split_random(keys, num_videos, num_train)
                splits.append({
                    'train_keys': train_keys,
                    'test_keys': test_keys,
                    })

            # saveto = osp.join(self.split_file)
            write_json(splits, self.split_file)
            print("Splits saved to {}".format(self.split_file))

        
    def __str__(self):
        vars = [attr for attr in dir(self) if not callable(getattr(self,attr)) and not (attr.startswith("__") or attr.startswith("_"))]

        info_str = ''
        for i, var in enumerate(vars):
            val = getattr(self, var)
            if isinstance(val, Variable):
                val = val.data.cpu().numpy().tolist()[0]
            info_str += '['+str(i)+'] '+var+': '+str(val)+'\n'

        return info_str
        


### Trainer

In [108]:
class Trainer:
    def __init__(self, hps: HParameters):
        print("Initializing HMT model and optimizer...")
        self.hps = hps
        self.model = Trirar()
        self.verbose = True
        self.criterion = nn.MSELoss().to(self.hps.device)
        self.show_every = 1
        self.init_model()
        

        
    def init_model(self):
        if self.hps.model_path:
            self.model.load_state_dict(torch.load(self.hps.model_path, map_location=lambda storage, loc: storage))
            print('loading pretrained model from', self.hps.model_path)
        else:
            self.model.eval()
            self.model.apply(weights_init)

        self.model.to(self.hps.device)
   
    def train(self, train_keys):
        losses=[]
        
        pbar = tqdm(total=len(train_keys), position=0, leave=True)
        
        for i, key in enumerate(train_keys[:2]):
            with h5py.File(self.hps.dataset) as d, h5py.File(self.hps.scene_segs) as scnseg:
                vid_feats= d[key]['features'][...]
                aud_feats= d[key]['aud_feats'][...]
                boundaries = d[key]['fchange_points'][...]
                scn_boundaries = scnseg[key]['scene_points'][...]
                n_frames = d[key]['n_frames'][()]
                target = d[key]['gt_score'][...]
                
                # target = target.astype(float)
                
            vid_feats = torch.from_numpy(vid_feats).unsqueeze(0).float().to(self.hps.device)
            aud_feats = torch.from_numpy(aud_feats).unsqueeze(0).float().to(self.hps.device)
            target = torch.tensor(target).float().to(self.hps.device)
            boundaries = torch.tensor(boundaries).to(self.hps.device)
            scn_boundaries = torch.tensor(scn_boundaries).to(self.hps.device)

            # Min-Max Normalize frame scores
            target -= target.min()
            target /= target.max()


            # if self.hps.use_cuda:
            #     vid_feats, aud_feats, target, boundaries  = vid_feats, aud_feats.float().to(device), target.float().to(device), boundaries.to(device)

            # seq_len = seq.shape[1]
            # print('Video key:',key, 'video and audio feat shape:', vid_feats.shape, aud_feats.shape)
            batch_size = torch.tensor(self.hps.train_batch_size).to(self.hps.device)
            P = self.model(vid_feats,aud_feats, boundaries, scn_boundaries, batch_size)
            P = P.reshape(-1)
            # print(P)
            P_frames = get_frame_probs(P, boundaries, n_frames, self.hps.device)

            loss_att = 0
            loss = self.criterion(P_frames[:len(target)], target)
            # print('Loss:', loss)
            loss = loss + loss_att
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            losses.append(float(loss))
            
            pbar.update(1)
            # pbar.set_description(f"Loss:{loss.item()}")
            pbar.set_postfix({'Loss': loss.item()})
            del(boundaries)
            del(batch_size)
            
            
            
        return np.mean(np.array(losses))

    
    def video_fscore(self, machine_summary_activations, test_keys, metric='tvsum', att_vecs=None):
        eval_metric = 'avg' if metric == 'tvsum' else 'max'

        # if results_filename is not None:
        #     h5_res = h5py.File(results_filename, 'w')

        fus,fgs = [],[]
        video_scores = []
        for key_idx, key in enumerate(test_keys):
            
            probs = machine_summary_activations[key]


            with h5py.File(self.hps.dataset,'r') as d:
                cps = d[key]['change_points'][...]
                num_frames = d[key]['n_frames'][()]
                nfps = d[key]['n_frame_per_seg'][...].tolist()
                positions = d[key]['picks'][...]
                user_summary = d[key]['user_summary'][...]
                gt_summary = d[key]['gt_summary'][...]

            machine_summary = generate_summary(probs, cps, num_frames, nfps, positions)
            fu, _, _ = evaluate_usersummary(machine_summary, user_summary, eval_metric)
            fg, _, _ = evaluate_gtsummary(machine_summary, gt_summary)
            fus.append(fu)
            fgs.append(fg)

            # Reporting & logging
            video_scores.append([key_idx + 1, key, "{:.1%}".format(fu), "{:.1%}".format(fg)])
            # video_gt_scores.append([key_idx + 1, key, "{:.1%}".format(fg)])
            
        mean_fu = np.mean(fus)
        mean_fg = np.mean(fgs)
        
        return (mean_fu+mean_fg)/2, video_scores

    def validate(self, test_keys, show=None):
        self.model.eval()
        summary = {}
        valLoss = []
        with torch.no_grad():
            for i, key in enumerate(test_keys):
                with h5py.File(self.hps.dataset) as d, h5py.File(self.hps.scene_segs) as scnseg:
                    vid_feats = d[key]['features'][...]
                    aud_feats= d[key]['aud_feats'][...]
                    boundaries = d[key]['fchange_points'][...]
                    scn_boundaries = scnseg[key]['scene_points'][...]
                    n_frames = d[key]['n_frames'][()]
                    target = d[key]['gt_score'][...]
                    
                vid_feats = torch.from_numpy(vid_feats).unsqueeze(0).float().to(self.hps.device)
                aud_feats = torch.from_numpy(aud_feats).unsqueeze(0).float().to(self.hps.device)
                boundaries = torch.tensor(boundaries).to(self.hps.device)
                scn_boundaries = torch.tensor(scn_boundaries).to(self.hps.device)
                target = torch.tensor(target).float().to(self.hps.device)

                batch_size = torch.tensor(self.hps.train_batch_size).to(self.hps.device)
                P = self.model(vid_feats,aud_feats, boundaries, scn_boundaries, batch_size)
                P = P.reshape(-1)
                P_frames = get_frame_probs(P, boundaries, n_frames, self.hps.device)
                
                # Min-Max Normalize frame scores
                target -= target.min()
                target /= target.max()
                valLoss.append(self.criterion(P_frames[:len(target)], target))
                
                summary[key] = P.detach().cpu().numpy()
                del(boundaries)
                del(batch_size)


        f_score, video_scores  = self.video_fscore(summary, test_keys)
        
        if show:
            print('Average F-score: ', f_score)
            scores = [["No.", "Video", "User F-score", "GT F-Score"]] + video_scores
            print_table(scores)
            return
            
        return f_score, video_scores, np.mean(valLoss)
        
        
    def run(self):
        self.model.train()
        
        parameters = filter(lambda p: p.requires_grad, self.model.parameters())
        self.optimizer = torch.optim.Adam(parameters, lr=self.hps.lr[0], weight_decay=self.hps.l2_req)
        
        lr = self.hps.lr[0] 
        
        f = open(hps.split_file)
        splits = json.load(f)
        n_folds = len(splits)
        
        print("Starting training...")
        for split in splits:
            max_val_fscore = 0
            max_val_fscore_epoch = 0
            train_keys = split['train_keys']
            test_keys = split['test_keys']

            epoch_losses=[]
            for epoch in range(self.hps.epochs_max):

                print("Epoch: {0:6}".format(str(epoch)+"/"+str(self.hps.epochs_max)), end='')
                with open(self.hps.results_path, "a") as f:
                    f.write(str(epoch)+'\t')
                    
                self.model.train()

                random.shuffle(train_keys) 
                loss = self.train(train_keys)
                epoch_losses.append(np.mean(loss))
                
                print(f'Epoch:{epoch}, Loss:{loss}')
                with open(self.hps.results_path, "a") as f:
                    f.write(str(loss)+'\t')
                
                
                
                # Evaluate train/test dataset
                val_fscore, video_scores, valLoss = self.validate(test_keys)
                with open(self.hps.results_path, "a") as f:
                    f.write(str(valLoss)+'\n')
                
        
                if max_val_fscore < val_fscore:
                    max_val_fscore = val_fscore
                    max_val_fscore_epoch = epoch

                self.save_model(self.hps.model_folder, f'model@epc{epoch%6}')


            avg_loss = np.array(epoch_losses)
            print("   Avg. Train loss: {0:.05f}".format(np.mean(np.array(epoch_losses))), end='')
            print('   Test F-score avg/max: {0:0.5}/{1:0.5}'.format(val_fscore, max_val_fscore  ))

            
            if self.verbose:
                video_scores = [["No.", "Video", "User F-score", "GT F-Score"]] + video_scores
                print_table(video_scores)
             

        # return max_val_fscore, max_val_fscore_epoch
        return
    
    def save_model(self, path, name):
        # Save model weights
        filename = name+'.pt.tar'
        torch.save(self.model.state_dict(), os.path.join(path, filename))
        

**Train**

In [109]:
args={
    'results_path':'results/training_results.txt',
    'num_splits':5,
    'train_batch_size':5,
    'split_file':'splits/test_split1.json',
    'dataset': '../../Preprocessing/extracted_features/normal/TVSum05s.h5',
    'Scene_segments': '../../Segmentation/Transnet/transnet_segments/tvsumSegs05s.h5',
    'model_folder': 'models',
    # 'model_path':'models/model@epc0.pt.tar',
    'train_batch_size':5,
    'epochs': 50,
    'train_percent':0.8,
    'verbose':True,
    'use_cuda' : False,
    'cuda_device': None,
    'max_summary_length': 0.15
    
}


In [110]:
hps = HParameters(args)
# hps.load_from_args(args.__dict__)
hps.create_split()

Loading dataset from ../../Preprocessing/extracted_features/normal/TVSum05s.h5
Split breakdown: # total videos 50. # train videos 40. # test videos 10
Splits saved to splits/test_split1.json


In [111]:
trainer = Trainer(hps)
trainer.run()

Initializing HMT model and optimizer...
Starting training...
Epoch: 0/50  

  5%|▌         | 2/40 [00:02<00:49,  1.30s/it, Loss=0.165]


Epoch:0, Loss:0.18216563016176224
Epoch: 1/50  

  5%|▌         | 2/40 [00:02<00:48,  1.26s/it, Loss=0.177]


Epoch:1, Loss:0.1481345035135746
Epoch: 2/50  

  5%|▌         | 2/40 [00:01<00:24,  1.52it/s, Loss=0.169]


Epoch:2, Loss:0.216939277946949
Epoch: 3/50  

  0%|          | 0/40 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
f = open(hps.split_file)
splits = json.load(f)
n_folds = len(splits)
test_keys = splits[0]['test_keys']
trainer.validate(test_keys)

In [None]:
f = open(hps.split_file)
splits = json.load(f)
n_folds = len(splits)
test_keys = splits[0]['test_keys']
trainer.validate(test_keys, show=True)

In [None]:
trainer.save_model()

---------------------------

In [None]:
torch.tensor([[1,2,5,3], [1,2,5,3]])

In [92]:
with open("results/myfile3.txt", "a"):
    pass