# Make a summary

## Use the model predictions on the original video to select frames and concatenate them to turn it into a playable summary. Use heuristics to make the concatenation more smooth and easy to watch.

### steps to create summary

1. select dataset
2. get predictions; prediction, video, frame_nr
3. select prediction above a certain threshold
4. concatenate predicted frames and save as summary
5. copy video from remote to local
6. watch video

### steps to get ground truth summary

7. select dataset, for videosum just copy trailers from remote to local, otherwise:
8. get ground truth data
9. concatenate ground truth frames and save as summary
10. copy video from remote to local
11. watch video

### improve predictions by
12. create torch dataset per video in dataset, and run model through whole video to make sure no frames are missed

In [1]:
import abc
import h5py
import pickle
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
import sys
sys.path.append('/home/emma/summary_evaluation/score_evaluators')
from prediction_data import PredictionData 
from operator import add

### 1. Select dataset

* 'MovieSum'
* 'SumMe'
* 'TVSum'

In [126]:
class VideosumDataset(PredictionData):
    def __init__(self, name, num_classes, local=True):
        self.repr_name = name
        self.name = name.lower()
        if num_classes == 1:
            self.model_name = self.name + '_one'
        else:
            self.model_name = self.name + '_two'
        print(self.model_name)
        super().__init__(self.name, num_classes, self.model_name, local)
        
        self.meta_data = self.get_meta_data()
        self.fps_df = self.get_fps()
        
        self.data_frame = self.get_data_set()
        self.segments = self.get_segments()
        
    def get_meta_data(self):
        file_path = self.name + '_ds.pickle'
        with open(file_path, 'rb') as file: 
            data = pickle.load(file)
        return data
    
            
    def get_fps(self):
        fps_df = pd.DataFrame()
 
        fps = []
        vids = []
        
        for movie_name, data in self.meta_data.items():
            fps.append(data['fps'])
            
            if self.name == 'moviesum':
                vid = int(movie_name.split('_')[0])
            else:
                vid = movie_name
                
            vids.append(vid)
            
        fps_df['vid'] = vids
        fps_df['fps'] = fps 
        return fps_df
    
    def get_frame_second(self, fn, fps):
        if self.name == moviesum:
            frame_time = (fn/fps)*25
        else:
            frame_time = fn/fps
        return frame_time

    
    def get_data_set(self):
        data_frame = pd.DataFrame()
        data_frame['y_pred'] = self.y_pred
        data_frame['y_true'] = self.y_true
        if self.y_prob is not None:
            data_frame['y_prob'] = self.y_prob
        data_frame['vid'] = self.vids        
        data_frame['fns'] = self.fns
        data_frame = pd.merge(data_frame, self.fps_df, how='left', left_on=['vid'], right_on=['vid'])
        data_frame['f_time'] = data_frame.apply(lambda x: self.get_frame_second(x.fns, x.fps), axis=1)
        data_frame = data_frame.sort_values(by=['vid','f_time'], ascending=True)
        data_frame['concat_pred'] = self.calc_concatenated_predictions(data_frame.y_pred.values)
        return data_frame

    
    def calc_concatenated_predictions(self, y_pred):
        """ implement concat heuristics, such that two or one zeros between two ones also become one
        the method below does so by adding a padding of len=1 to all positive predictions
        the method below is chosen because it does not require iteration through the predictions"""
        add_1 = [int(x) for x in y_pred]
        add_1 = [0] + add_1[:-1]
        add_2 = [int(x) for x in y_pred]
        add_2 = add_2[1:] + [0]

        concat_pred = np.maximum(np.array(add_1), np.array(add_2))
        concat_pred = np.maximum(concat_pred, y_pred)
        return concat_pred
    
    def get_segments(self):
        segments = {}
        for video_name, video_data in self.data_frame.groupby(by='vid'):
            video_segments = video_data[video_data.concat_pred==1]
            segments[video_name] = list(video_segments.f_time.values)
            #segments[video_name]['sec'] = set(video_segments['f_time'].apply(lambda x: int(x)))
            
        return segments

    



### 2. get predictions; prediction, video, frame_nr

In [130]:
moviesum = VideosumDataset(name='MovieSum', num_classes=2)
tvsum = VideosumDataset(name='TVSum', num_classes= 1)
summe = VideosumDataset(name='SumMe', num_classes= 1)


moviesum_two
probabilities of predictions of moviesum not saved
tvsum_one
summe_one


### 3. select prediction above a certain threshold
0.5 is standard (pred_data.y_pred)

In [22]:
t = 0.4
# moviesum.get_y(threshold=t, random=False)
# moviesum.y_thresh
tvsum.get_y(threshold=t, random=False)
tvsum.y_thres
summe.get_y(threshold=t, random=False)
summe.y_thres

array([0., 0., 0., ..., 0., 0., 0.])

### 4. concatenate predicted frames or seconds and save as summary
#### without audio:
fmpeg -i <input_video> -pattern_type glob -framerate 1 -i '</path/to/overlays/*.png>' -filter_complex overlay <output_video>
#### with audio:
ffmpeg -i input file.mp4 -ss 00:00:00 -t 00:10:00 -c:v copy -an -f mp4 ep1_cut7m.mp4.

cat mylist.txt
file '/path/to/file1'
file '/path/to/file2'
file '/path/to/file3'

ffmpeg -f concat -i mylist.txt -c copy output.mp4

In [142]:
for movie_name, f_times in moviesum.segments.items():
    #print(f_times)
    segment_times = get_segment_times(f_times)
    #moviesum.segments[movie_name]['segment_times'] = segment_times

In [143]:
segment_times

[(1.0427093760427095, 7784.868201534869),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 7785.910910910911),
 (0, 778

[[74.03229166666667,
  74.03229166666667,
  74.03229166666667,
  75.07499999999999,
  87.58749999999999],
 [88.63020833333333,
  88.63020833333333,
  89.67291666666667,
  89.67291666666667,
  90.71562499999999,
  106.35624999999999],
 [108.44166666666666, 109.48437499999999, 325.325],
 [387.8875,
  388.9302083333333,
  388.9302083333333,
  389.97291666666666,
  389.97291666666666,
  389.97291666666666,
  391.015625,
  391.015625,
  391.015625,
  397.27187499999997],
 [398.3145833333333,
  398.3145833333333,
  399.35729166666664,
  399.35729166666664,
  400.4,
  452.53541666666666],
 [454.6208333333333, 455.6635416666666, 467.1333333333333],
 [531.78125,
  532.8239583333333,
  532.8239583333333,
  533.8666666666667,
  595.3864583333333],
 [596.4291666666667,
  596.4291666666667,
  597.471875,
  597.471875,
  597.471875,
  598.5145833333332,
  598.5145833333332,
  611.0270833333333],
 [611.0270833333333,
  612.0697916666667,
  612.0697916666667,
  612.0697916666667,
  613.1125,
  613.112

In [84]:
moviesum.segments
# tvsum.segments
# summe.segments

{55: {'f_time': array([  72.98958333,   72.98958333,   74.03229167,   74.03229167,
           74.03229167,   75.075     ,   87.5875    ,   87.5875    ,
           88.63020833,   88.63020833,   89.67291667,   89.67291667,
           90.715625  ,  106.35625   ,  107.39895833,  108.44166667,
          109.484375  ,  325.325     ,  386.84479167,  387.8875    ,
          387.8875    ,  388.93020833,  388.93020833,  389.97291667,
          389.97291667,  389.97291667,  391.015625  ,  391.015625  ,
          391.015625  ,  397.271875  ,  397.271875  ,  398.31458333,
          398.31458333,  399.35729167,  399.35729167,  400.4       ,
          452.53541667,  453.578125  ,  454.62083333,  455.66354167,
          467.13333333,  529.69583333,  530.73854167,  531.78125   ,
          532.82395833,  532.82395833,  533.86666667,  595.38645833,
          596.42916667,  596.42916667,  596.42916667,  597.471875  ,
          597.471875  ,  597.471875  ,  598.51458333,  598.51458333,
          611.027083

In [33]:
command = 'ffmpeg -i {} -ss {} -t {} -c:v copy -an -f mp4 {}'

In [22]:
command.format(input_video, start_time, duration, output_video)