## Transnet

**installations**

In [9]:
!pip install ffmpeg
!pip install ffmpeg-python pillow



**packages**

In [2]:
import sys
import argparse
import os

import h5py
import numpy as np
import pandas as pd

import tensorflow as tf

import ffmpeg


#### )) Paths

In [3]:
datasets_path='../../data'
public_dataset_path=datasets_path+'/Public datasets'
custom_data = datasets_path+'/Custom data'

tvsum_data = public_dataset_path+'/ydata-tvsum50-v1_1'
summe_data = public_dataset_path+'/SUMMe'

result_path= 'transnet_segments/'

#### )) Transnet

In [9]:
class TransNetV2:
    

    def __init__(self):
        model_dir = "transnetv2-weights/"
        self._input_size = (27, 48, 3)
        try:
            self._model = tf.saved_model.load(model_dir)
        except OSError as exc:
            raise IOError(f"[TransNetV2] It seems that files in {model_dir} are corrupted or missing. "
                          f"Re-download them manually and retry. For more info, see: "
                          f"https://github.com/soCzech/TransNetV2/issues/1#issuecomment-647357796") from exc

    def predict_raw(self, frames: np.ndarray):
        assert len(frames.shape) == 5 and frames.shape[2:] == self._input_size, \
            "[TransNetV2] Input shape must be [batch, frames, height, width, 3]."
        frames = tf.cast(frames, tf.float32)
        logits, dict_ = self._model(frames)
        single_frame_pred = tf.sigmoid(logits)
        # all_frames_pred = tf.sigmoid(dict_["many_hot"])
        
        return single_frame_pred
    # , all_frames_pred

    def predict_frames(self, frames: np.ndarray):
        assert len(frames.shape) == 4 and frames.shape[1:] == self._input_size, "[TransNetV2] Input shape must be [frames, height, width, 3]."

        def input_iterator():
            # return windows of size 100 where the first/last 25 frames are from the previous/next batch
            # the first and last window must be padded by copies of the first and last frame of the video
            no_padded_frames_start = 25
            no_padded_frames_end = 25 + 50 - (len(frames) % 50 if len(frames) % 50 != 0 else 50)  # 25 - 74

            start_frame = np.expand_dims(frames[0], 0)
            end_frame = np.expand_dims(frames[-1], 0)
            padded_inputs = np.concatenate(
                [start_frame] * no_padded_frames_start + [frames] + [end_frame] * no_padded_frames_end, 0
            )

            ptr = 0
            while ptr + 100 <= len(padded_inputs):
                out = padded_inputs[ptr:ptr + 100]
                ptr += 50
                yield out[np.newaxis]

        predictions = []

        for inp in input_iterator():
            # single_frame_pred, all_frames_pred = self.predict_raw(inp)
            single_frame_pred = self.predict_raw(inp)
            predictions.append((single_frame_pred.numpy()[0, 25:75, 0]))

            print("\r[TransNetV2] Processing video frames {}/{}".format(
                min(len(predictions) * 50, len(frames)), len(frames)
            ), end="")
        print("")

        single_frame_pred = np.concatenate([single_ for single_ in predictions])
        # all_frames_pred = np.concatenate([all_ for single_, all_ in predictions])

        # return single_frame_pred[:len(frames)], all_frames_pred[:len(frames)]  # remove extra padded frames
        return single_frame_pred[:len(frames)]

    def predict_video(self, video_fn: str):

        print("[TransNetV2] Extracting frames from {}".format(video_fn))
        video_stream, err = ffmpeg.input(video_fn).output(
            "pipe:", format="rawvideo", pix_fmt="rgb24", s="48x27"
        ).run(capture_stdout=True, capture_stderr=True)

        video = np.frombuffer(video_stream, np.uint8).reshape([-1, 27, 48, 3])
        return self.predict_frames(video)

    @staticmethod
    def predictions_to_scenes(predictions: np.ndarray, threshold: float = 0.5):
        predictions = (predictions > threshold).astype(np.uint8)

        scenes = []
        t, t_prev, start = -1, 0, 0
        for i, t in enumerate(predictions):
            if t_prev == 1 and t == 0:
                start = i
            if t_prev == 0 and t == 1 and i != 0:
                scenes.append([start, i])
            t_prev = t
        if t == 0:
            scenes.append([start, i])

        # just fix if all predictions are 1
        if len(scenes) == 0:
            return np.array([[0, len(predictions) - 1]], dtype=np.int32)

        return np.array(scenes, dtype=np.int32)


def main(args, mappings=None):
    model = TransNetV2()
    video_folder = args['videoFolder_path']+'/'
    files = os.listdir(video_folder)
    c=1
    
    
    with h5py.File(args['cpsH5_path'],'a') as d:
        for file in files:
            if mappings!=None:
                 name = 'video_'+str(mappings.index(file.split('.')[0])+1)
            else:
                name = 'video_'+str(c)
                
            if name in d:
                c+=1
                continue
                
            # video_frames, 
            single_frame_predictions= model.predict_video(video_folder+file)
            scenes = model.predictions_to_scenes(single_frame_predictions)

            c=c+1
            print(str(c)+'. '+file+' as '+name+'; no.of shots='+str(len(scenes)))
            d.create_dataset(name+'/change_points', data=scenes )
            d.create_dataset(name+'/video_name', data=file )
        





#### )) TVSum segmentations

In [4]:
## Arguments
args={
    'videoFolder_path': tvsum_data,
    'cpsH5_path': result_path+'/tvsumSegs.h5',    
}

In [14]:
## video name mappings
tvsum_info=pd.read_csv(tvsum_data+'/data/ydata-tvsum50-info.tsv',sep='\t')
vnames = tvsum_info['video_id'].tolist()

In [19]:
## generate segments
main(args, vnames)

[TransNetV2] Extracting frames from ../../data/Public datasets/ydata-tvsum50-v1_1/video/Bhxk-O1Y7Ho.mp4
[TransNetV2] Processing video frames 13511/13511
11. Bhxk-O1Y7Ho.mp4 as video_12; no.of shots=43
[TransNetV2] Extracting frames from ../../data/Public datasets/ydata-tvsum50-v1_1/video/byxOvuiIJV0.mp4
[TransNetV2] Processing video frames 3705/3705
12. byxOvuiIJV0.mp4 as video_34; no.of shots=53
[TransNetV2] Extracting frames from ../../data/Public datasets/ydata-tvsum50-v1_1/video/cjibtmSLxQ4.mp4
[TransNetV2] Processing video frames 19406/19406
13. cjibtmSLxQ4.mp4 as video_21; no.of shots=157
[TransNetV2] Extracting frames from ../../data/Public datasets/ydata-tvsum50-v1_1/video/E11zDS9XGzg.mp4
[TransNetV2] Processing video frames 15307/15307
14. E11zDS9XGzg.mp4 as video_46; no.of shots=13
[TransNetV2] Extracting frames from ../../data/Public datasets/ydata-tvsum50-v1_1/video/EE-bNr36nyA.mp4
[TransNetV2] Processing video frames 2941/2941
15. EE-bNr36nyA.mp4 as video_38; no.of shots=1

## )) Custom Dataset

In [10]:
## Arguments
args={
    'videoFolder_path': '../../data/Custom dataset',
    'cpsH5_path': result_path+'/CustomSegs.h5',    
}

main(args)

[TransNetV2] Extracting frames from ../../data/Custom dataset/0tmA_C6XwfM.mp4
[TransNetV2] Processing video frames 3532/3532
1. 0tmA_C6XwfM.mp4 as video_0; no.of shots=21
[TransNetV2] Extracting frames from ../../data/Custom dataset/3eYKfiOEJNs.mp4
[TransNetV2] Processing video frames 4853/4853
2. 3eYKfiOEJNs.mp4 as video_1; no.of shots=41
[TransNetV2] Extracting frames from ../../data/Custom dataset/ehsaas.mp4
[TransNetV2] Processing video frames 146247/146247
3. ehsaas.mp4 as video_2; no.of shots=845
[TransNetV2] Extracting frames from ../../data/Custom dataset/test1.mp4
[TransNetV2] Processing video frames 13260/13260
4. test1.mp4 as video_3; no.of shots=112
[TransNetV2] Extracting frames from ../../data/Custom dataset/video_2.mp4
[TransNetV2] Processing video frames 5320/5320
5. video_2.mp4 as video_4; no.of shots=43


#### )) Generate Scene segmentations

In [11]:
args={
    'TSegs': result_path+'/TVSumSegOri.h5',
    'Shot_cps_path': '../../Preprocessing/extracted_features/normal'+'/TVSum.h5'
}

In [1]:
def gen_scn_bndrs(args):
    with h5py.File(args['Shot_cps_path'], 'r') as tvsum, h5py.File(args['TSegs'], 'a') as d:
        for key in d.keys():
            print('key:', key)
            shots = tvsum[key]['change_points'][()]
            scenes = d[key]['change_points'][()]
            
            lngth = len(shots)
            
            scene_points=[]
            l=r=0
            for start, end in scenes:
                while r<=lngth-2 and shots[r][1]<end:
                    r+=1
                
                
                m = (shots[r][0]+shots[r][1])//2
                
                if(end<=m):
                    if l<=r-1:
                        scene_points.append([l, r-1])
                        l=r
                else:
                    scene_points.append([l, r])
                    l=r+1
                    
                r+=1
                if r==lngth:
                    break
                
                
            scene_points[-1][1]=lngth-1
            d.create_dataset(key+'/scene_points', data=scene_points)
            # d[key+'/scene_points'][...] = scene_points
            print(scene_points)
            
            print('no. of shots:', len(shots))
            print('no. of scenes:', len(scene_points))
            
            

In [13]:
gen_scn_bndrs(args)

key: video_1
[[0, 0], [1, 1], [2, 2], [3, 9], [10, 10], [11, 13], [14, 14], [15, 15], [16, 16], [17, 25], [26, 38], [39, 62], [63, 71], [72, 80], [81, 82], [83, 97], [98, 107], [108, 117], [118, 136], [137, 149], [150, 176], [177, 177], [178, 180], [181, 183]]
no. of shots: 184
no. of scenes: 24
key: video_10
[[0, 3], [4, 6], [7, 8], [9, 10], [11, 12], [13, 13], [14, 16], [17, 17], [18, 19], [20, 27], [28, 28], [29, 29], [30, 31], [32, 32], [33, 33], [34, 35], [36, 36], [37, 38], [39, 40], [41, 44], [45, 46], [47, 48], [49, 55], [56, 56], [57, 57], [58, 60], [61, 62], [63, 64], [65, 66], [67, 69]]
no. of shots: 70
no. of scenes: 30
key: video_11
[[0, 3], [4, 6], [7, 16], [17, 34], [35, 44], [45, 46], [47, 49], [50, 51], [52, 52], [53, 54], [55, 57], [58, 60], [61, 65], [66, 72], [73, 82]]
no. of shots: 83
no. of scenes: 15
key: video_12
[[0, 1], [2, 3], [4, 6], [7, 9], [10, 16], [17, 17], [18, 18], [19, 19], [20, 35], [36, 36], [37, 42], [43, 48], [49, 49], [50, 52], [53, 54], [55, 55]

In [27]:
result_path+'BigSegs.h5'

'transnet_segments/BigSegs.h5'

In [4]:
args={
    'TSegs': result_path+'CustomSegs.h5',
    'Shot_cps_path': '../../Preprocessing/extracted_features/normal'+'/customSet.h5'
}

gen_scn_bndrs(args)

key: video_1
[[0, 2], [3, 4], [5, 7], [8, 9], [10, 11], [12, 15], [16, 16], [17, 18], [19, 20], [21, 21], [22, 23], [24, 33], [34, 48], [49, 51], [52, 59], [60, 60], [61, 61], [62, 62], [63, 67], [68, 68], [69, 71]]
no. of shots: 72
no. of scenes: 21
key: video_2
[[0, 2], [3, 4], [5, 6], [7, 7], [8, 11], [12, 14], [15, 15], [16, 17], [18, 20], [21, 21], [22, 25], [26, 27], [28, 29], [30, 30], [31, 32], [33, 34], [35, 35], [36, 36], [37, 37], [38, 38], [39, 39], [40, 40], [41, 43], [44, 47], [48, 54], [55, 60], [61, 67], [68, 71], [72, 78], [79, 80], [81, 81], [82, 83], [84, 85], [86, 86], [87, 87], [88, 90], [91, 94], [95, 95], [96, 98]]
no. of shots: 99
no. of scenes: 39
key: video_3
[[0, 0], [1, 1], [2, 2], [3, 6], [7, 7], [8, 9], [10, 18], [19, 19], [20, 21], [22, 24], [25, 25], [26, 26], [27, 27], [28, 28], [29, 29], [30, 30], [31, 31], [32, 32], [33, 33], [34, 36], [37, 37], [38, 39], [40, 41], [42, 44], [45, 48], [49, 49], [50, 51], [52, 52], [53, 53], [54, 54], [55, 55], [56, 56

In [29]:
with h5py.File(result_path+'BigSegs.h5') as g:
    for key in g.keys():
        print(key)

video_1
video_10
video_11
video_12
video_13
video_14
video_15
video_16
video_17
video_18
video_19
video_2
video_20
video_21
video_22
video_23
video_24
video_25
video_26
video_27
video_28
video_29
video_3
video_30
video_31
video_32
video_33
video_34
video_35
video_36
video_37
video_38
video_39
video_4
video_40
video_41
video_42
video_43
video_44
video_45
video_46
video_47
video_48
video_49
video_5
video_50
video_6
video_7
video_8
video_9


'ls' is not recognized as an internal or external command,
operable program or batch file.
