## *0. Librabry & Defined functions* ##

In [1]:
import fiftyone as fo
import clip
import torch
import json
import pandas as pd
import os
import requests

In [16]:
PATH_VIDEO = '/run/media/zephy_manjaro/Crucial X6/AIC2022/data/video/'
PATH_KEYFRAMES = '/run/media/zephy_manjaro/Crucial X6/AIC2022/data/keyframes/'
PATH_METADATA = 'data/metadata'

In [3]:
def combine_df(bonus_df, main_df):
    
    remain_len_for_main_df = len(main_df) - len(bonus_df)
    remain_df = main_df.head(remain_len_for_main_df)

    final_df = pd.concat([bonus_df, remain_df], ignore_index = True)
    
    final_df[['frameid']] = final_df[['frameid']].astype(int)
    
    return final_df

def add_confident_samples(dataset, selected_samples, df): # concat above

    bonus_video, bonus_frame = [], []
    for id_samples in selected_samples:
        # Access samples by id sample
        sample = dataset[id_samples]

        bonus_video.append(sample['video'])
        bonus_frame.append(int(sample['frameid']))

    # print(df.dtypes)
        
    bonus_df = pd.DataFrame({'video': bonus_video, 'frameid': bonus_frame})
    
    # print(bonus_df.dtypes)
    
    res_df = combine_df(bonus_df, df)
    
    # print(res_df.dtypes)
    
    return res_df


def get_neighbor_frames(video, frameid, delta=100):

    def search(frameid, frameid_list):
        found_index = -1
        l, r = 0, len(frameid_list) - 1
        while l <= r:
            mid = (l + r)//2
            if frameid_list[mid] >= frameid:
                found_index = mid
                r = mid - 1
            else:
                l = mid + 1

        return found_index

    def get_neighbor_list(frameid, frameid_list, delta):
        index = search(frameid, frameid_list)
        left, right = max(
            0, index - delta), min(len(frameid_list), index + delta)
        neighbor_frameid_list = frameid_list[left:right]
        return neighbor_frameid_list

    path_frames_video = os.path.join(PATH_KEYFRAMES, video)
    frameid_list = sorted(os.listdir(path_frames_video))

    neighbor_frameid_list = get_neighbor_list(frameid, frameid_list, delta)
    neighbor_frameid_list = [os.path.join(
        path_frames_video, file) for file in neighbor_frameid_list]

    return neighbor_frameid_list

def get_fps(video_name):

    path_meta = os.path.join(PATH_METADATA, video_name + '.json')

    data = None
    with open(path_meta, 'r') as f:
        data = json.load(f)
    
    return data['fps']


def get_url(video_name):

    path_meta = os.path.join(PATH_METADATA, video_name + '.json')

    data = None
    with open(path_meta, 'r') as f:
        data = json.load(f)

    return data['watch_url']

def get_timestamp(fps, frame_id):
    '''
    Get the timestamp of a frame
    Args:
        fps: fps of the video
        frame_id: frame id
    Returns:
        timestamp: tuple of (hour, minute, second)
    '''
    hour = int(frame_id / (fps * 3600))
    minute = int((frame_id - hour * fps * 3600) / (fps * 60))
    second = int((frame_id - hour * fps * 3600 - minute * fps * 60) / fps)
    return (hour, minute, second)

def get_frame_id(video_name, fps, timestamp=(0,0,0), path_frames=PATH_KEYFRAMES):
    '''
    Get the closest frame id (in folder at path_frames) of a video at a given timestamp
    Args:
        video_name: name of the video
        timestamp: tuple of (hour, minute, second)
        path_frames: path to the frames (default to './frames')
    Returns:
        frame_id: frame id
    '''
    true_frame_id = timestamp[0] * fps * 3600 + timestamp[1] * fps * 60 + timestamp[2] * fps
    # all_frames = [int(frame.split('.')[0]) for frame in os.listdir(os.path.join(path_frames, video_name.split('.')[0]))]
    # closest_frame_id = min(all_frames, key=lambda x:abs(x-true_frame_id))
    return true_frame_id


def submit_result(item, frame, session):
    
    url = 'https://eventretrieval.one/api/v1/submit'
    params = {
    'item': item,
    'frame': frame,
    'session': session
    }

    r = requests.get(url, params)
    res = r.json()
    return res




## *1. Load top samples with highest score from inference notebook* 

In [None]:
id = 2

dataset = fo.Dataset.from_dir(
    dataset_dir='submission/{}_top_k_images'.format(id),
    dataset_type=fo.types.FiftyOneDataset
)


## *2. Adjust Submission*

### 2.1 Launch App

In [58]:
session = fo.launch_app(dataset, auto=False)


Session launched. Run `session.show()` to open the App in a cell output.


### 2.2 Finalize Result 

In [27]:
df_submission = pd.DataFrame({'video': [], 'frameid': []})

#### 2.2.1 Select 100 first samples


In [28]:
# Select
head_100_samples_id = [sample['id'] for sample in dataset][:100]
session.select_samples(head_100_samples_id)

In [29]:
# Get selected samples id
selected_samples = session.selected
# Add to submission
df_submission = add_confident_samples(dataset, selected_samples, df_submission)
# Clear selected samples
session.clear_selected()

#### 2.2.2 Modify result

Choose (tick select) samples which are reliable <br />
<span style="color:red">**Notice**:</span> *The order of samples add to the submission dataframe is equivalent to the order of the user choose*

In [55]:
# Select GUI above
session.open_tab()

<IPython.core.display.Javascript object>

In [10]:
# Get selected samples id
selected_samples = session.selected
# Add to submission
df_submission = add_confident_samples(dataset, selected_samples, df_submission)
# Clear selected samples
session.clear_selected()


*Optional*: If can not find desireable samples, but we suspect some samples that related to answer. Otherwise, skipping below stuff and moving to **Section 3**
- **1st alternative method**: 2.2.2.1 Get neighbor frames around that frames -> append to above submission
- **2nd alternative method**: 2.2.2.2 Have a timestamp on video which are matched with query -> append to above submission

##### **2.2.2.1 Select neighbor frames**

- Input video and frameid of suspected sample

In [60]:
video = 'C00_V0221'
frameid = '005966.jpg'

- Export neighbor frames of suspected sample

In [61]:
neighbor_frameid_list = get_neighbor_frames(
    video=video, frameid=frameid, delta=100)

neighbor_dataset = fo.Dataset.from_images(
    neighbor_frameid_list
)

for sample in neighbor_dataset:
    _, sample['video'], sample['frameid'] = sample['filepath'][:-
                                                               4].rsplit('/', 2)
    sample.save()


neighbor_session = fo.launch_app(neighbor_dataset, auto=False)
neighbor_session.show()




 100% |█████████████████| 200/200 [48.3ms elapsed, 0s remaining, 4.1K samples/s]   
Session launched. Run `session.show()` to open the App in a cell output.


Visualize and select neighbor that match with query

In [18]:
#Select opened GUI

In [30]:
# Get selected samples id
selected_samples = neighbor_session.selected
# Add to submission
df_submission = add_confident_samples(neighbor_dataset, selected_samples, df_submission)
# Clear selected samples
neighbor_session.clear_selected()

##### **2.2.2.2 Get frames based on timestamp**

In [41]:
video_name = 'C00_V0000.mp4'
timestamp  = (0, 12, 30)
fps = 25

frame_id = get_frame_id(video_name, fps, timestamp)

df_bonus_add = pd.DataFrame({'video': [video_name], 'frameid': [frame_id]})

df_submission = combine_df(df_bonus_add, df_submission)



## 3. Validate by viewing video

In [56]:
video_name = 'C02_V0381'
frame_id = '001280'

h,m,s = get_timestamp(get_fps(video_name), int(frame_id))

total_second = h * 3600 + m * 60 + s
video_path = os.path.join('/run/media/zephy_manjaro/Crucial\ X6/AIC2022/data/video/', video_name + '.mp4')

# Play video
!vlc --start-time=$total_second  $video_path 


VLC media player 3.0.17.4 Vetinari (revision 3.0.13-8-g41878ff4f2)
[[32;1m0000561d2ec5bab0[0m] main libvlc: [0;1mRunning vlc with the default interface. Use 'cvlc' to use vlc without interface.[0m
[[32;1m00007fdf2c007a00[0m] gl gl: [0;1mInitialized libplacebo v4.208.0 (API v208)[0m
[[32;1m00007fdf2c007a00[0m] glconv_vaapi_x11 gl error: [31;1mvaCreateSurfaces: attribute not supported[0m
[[32;1m00007fdf280563c0[0m] main video output error: [31;1mvideo output creation failed[0m
[[32;1m00007fdf3cc12f80[0m] main decoder error: [31;1mfailed to create video output[0m
[[32;1m00007fdf2c007a00[0m] gl gl: [0;1mInitialized libplacebo v4.208.0 (API v208)[0m
[[32;1m00007fdf3cc12f80[0m] avcodec decoder: [0;1mUsing NVIDIA VDPAU Driver Shared Library  515.76  Mon Sep 12 19:09:07 UTC 2022 for hardware decoding[0m
[[32;1m00007fdf3cc12f80[0m] main decoder error: [31;1mTimestamp conversion failed for 52080001: no reference clock[0m
[[32;1m00007fdf3cc12f80[0m] main decoder 

## 4. Export submission 

In [31]:
# Check submission format
df_submission

Unnamed: 0,video,frameid
0,C00_V0047,22446
1,C00_V0041,2753
2,C00_V0047,21054
3,C00_V0047,21088
4,C00_V0047,21280
...,...,...
95,C01_V0228,12982
96,C01_V0231,9659
97,C01_V0232,10323
98,C01_V0232,10934


In [27]:
item = df_submission['video'][0]
frame = df_submission['frameid'][0]
session = 'node0lr605kqgqel0ztycevox5s5k21'

submit_result(item, frame, session)

{'description': "Media item 'C00_V0041 (collection = UID(7e96588b-761d-45db-a86c-c73d8638c0f4))' could not be found.",
 'status': False}