# A. Prepare Environments

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1. Clone Codes

In [None]:
%cd /content
%rm -rf eval
!git clone https://github.com/hcmus-thesis-gulu/human-centric-evaluation.git eval
%cd /content/eval
%ls -sla

/content
Cloning into 'eval'...
remote: Enumerating objects: 297, done.[K
remote: Counting objects: 100% (176/176), done.[K
remote: Compressing objects: 100% (118/118), done.[K
remote: Total 297 (delta 110), reused 121 (delta 58), pack-reused 121[K
Receiving objects: 100% (297/297), 43.29 MiB | 25.03 MiB/s, done.
Resolving deltas: 100% (170/170), done.
/content/eval
total 540
  4 drwxr-xr-x 6 root root   4096 Jun 15 07:09  [0m[01;34m.[0m/
  4 drwxr-xr-x 1 root root   4096 Jun 15 07:09  [01;34m..[0m/
  4 drwxr-xr-x 4 root root   4096 Jun 15 07:09  [01;34mclassic[0m/
  4 drwxr-xr-x 8 root root   4096 Jun 15 07:09  [01;34m.git[0m/
  4 -rw-r--r-- 1 root root   3085 Jun 15 07:09  .gitignore
  4 drwxr-xr-x 2 root root   4096 Jun 15 07:09  [01;34mhuman-centric[0m/
  4 drwxr-xr-x 2 root root   4096 Jun 15 07:09  [01;34mpreprocess[0m/
  4 -rw-r--r-- 1 root root     26 Jun 15 07:09  README.md
  4 -rw-r--r-- 1 root root    759 Jun 15 07:09  requirements.txt
504 -rw-r--r-- 1 root 

## 2. Required libraries

In [None]:
%cd /content/eval
%pip install -r requirements.txt

/content/eval
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting altair==5.0.1 (from -r requirements.txt (line 1))
  Downloading altair-5.0.1-py3-none-any.whl (471 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.5/471.5 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
Collecting blinker==1.6.2 (from -r requirements.txt (line 3))
  Downloading blinker-1.6.2-py3-none-any.whl (13 kB)
Collecting cachetools==5.3.1 (from -r requirements.txt (line 4))
  Downloading cachetools-5.3.1-py3-none-any.whl (9.3 kB)
Collecting certifi==2023.5.7 (from -r requirements.txt (line 5))
  Downloading certifi-2023.5.7-py3-none-any.whl (156 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.0/157.0 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting charset-normalizer==3.1.0 (from -r requirements.txt (line 6))
  Downloading charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64

# B. Segmentations Exploration

In [None]:
import h5py
import numpy as np

## 1. Explore numbers

In [None]:
data = h5py.File('/content/eval/classic/data/eccv16_dataset_summe_google_pool5.h5')
print(data.keys())

<KeysViewHDF5 ['video_1', 'video_10', 'video_11', 'video_12', 'video_13', 'video_14', 'video_15', 'video_16', 'video_17', 'video_18', 'video_19', 'video_2', 'video_20', 'video_21', 'video_22', 'video_23', 'video_24', 'video_25', 'video_3', 'video_4', 'video_5', 'video_6', 'video_7', 'video_8', 'video_9']>


In [None]:
data['video_1'].keys()

<KeysViewHDF5 ['change_points', 'features', 'gtscore', 'gtsummary', 'n_frame_per_seg', 'n_frames', 'n_steps', 'picks', 'user_summary', 'video_name']>

In [None]:
np.array(data['video_1/change_points'])

array([[   0,  244],
       [ 245,  473],
       [ 474,  827],
       [ 828, 1081],
       [1082, 1233],
       [1234, 1438],
       [1439, 1530],
       [1531, 1642],
       [1643, 1796],
       [1797, 1973],
       [1974, 1998],
       [1999, 2110],
       [2111, 2403],
       [2404, 2539],
       [2540, 2664],
       [2665, 2908],
       [2909, 3160],
       [3161, 3370],
       [3371, 3521],
       [3522, 3649],
       [3650, 3899],
       [3900, 3940],
       [3941, 3990],
       [3991, 4034],
       [4035, 4079],
       [4080, 4138],
       [4139, 4228],
       [4229, 4285],
       [4286, 4342],
       [4343, 4493]])

In [None]:
fps = np.array(data['video_1/n_frame_per_seg'])
print(fps)

[245 229 354 254 152 205  92 112 154 177  25 112 293 136 125 244 252 210
 151 128 250  41  50  44  45  59  90  57  57 151]


In [None]:
print(fps.sum(), np.array(data['video_1/n_frames']))

4494 4494


In [None]:
print(str(np.array(data[f'video_1/video_name']).astype(str)))
print(np.array(data[f'video_1/video_name']).astype(str).item())

Air_Force_One
b'Air_Force_One'


## 2. Convert to json

In [None]:
segmentations = {}

In [None]:
for video_key in data:
    video_name = np.array(data[f'{video_key}/video_name']).astype(str).item()
    segmenation = {'video_name': video_name}
    print(f'Processing video with key {video_key} which is {video_name}')

    ends = np.array(data[f'{video_key}/change_points'])
    frames = np.array(data[f'{video_key}/n_frame_per_seg'])

    print(f'Total {len(ends)} segments!')

    segmenation['segments'] = [
        {
            'idx': idx,
            'start': int(segment[0][0]),
            'end': int(segment[0][1]),
            'num_frames': int(segment[1])
        } for idx, segment in enumerate(zip(ends, frames))
    ]

    segmentations[video_key] = segmenation

Processing video with key video_1 which is Air_Force_One
Total 30 segments!
Processing video with key video_10 which is Excavators river crossing
Total 65 segments!
Processing video with key video_11 which is Fire Domino
Total 11 segments!
Processing video with key video_12 which is Jumps
Total 7 segments!
Processing video with key video_13 which is Kids_playing_in_leaves
Total 22 segments!
Processing video with key video_14 which is Notre_Dame
Total 31 segments!
Processing video with key video_15 which is Paintball
Total 41 segments!
Processing video with key video_16 which is Playing_on_water_slide
Total 21 segments!
Processing video with key video_17 which is Saving dolphines
Total 45 segments!
Processing video with key video_18 which is Scuba
Total 15 segments!
Processing video with key video_19 which is St Maarten Landing
Total 12 segments!
Processing video with key video_2 which is Base jumping
Total 32 segments!
Processing video with key video_20 which is Statue of Liberty
Total

In [None]:
import json
with open('/content/segmentations.json', 'w', encoding='utf-8') as segment_file:
    json.dump(segmentations, segment_file)

# C. Visualize Results

In [None]:
%cd /content/context
%rm -rf "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/demo"
%mkdir "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/demo"

## 1. Visualize latent space

In [None]:
%run visualizer.py \
--video-folder data/videos \
--embedding-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/embeddings" \
--clustering-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/clustering" \
--demo-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/demo" \
--video-name Paintball \
--visual-type cluster \
--show-image \
--output-fps 4 \
--color-value label

## 2. Visualize temporal frames

In [None]:
%run visualizer.py \
--video-folder data/videos \
--embedding-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/embeddings" \
--clustering-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/clustering" \
--demo-folder "/content/drive/MyDrive/HCMUS/Y4/Thesis/Codes/summe/clustering/dino/demo" \
--video-name Paintball \
--visual-type cluster \
--output-fps 4 \
--color-value label