## *0. Librabry & Defined functions* ##

In [19]:
import fiftyone as fo
import clip
import torch
import json
import pandas as pd
import os

In [None]:
PATH_VIDEO = 'data/video'
PATH_KEYFRAMES = 'data/keyframes'


In [24]:
def combine_df(bonus_df, main_df):
    
    remain_len_for_main_df = len(main_df) - len(bonus_df)
    remain_df = main_df.head(remain_len_for_main_df)

    final_df = pd.concat([bonus_df, remain_df], ignore_index = True)
    
    return final_df

def add_confident_samples(dataset, selected_samples, df): # concat above

    bonus_video, bonus_frame = [], []
    for id_samples in selected_samples:
        # Access samples by id sample
        sample = dataset[id_samples]

        bonus_video.append(sample['video'])
        bonus_frame.append(sample['frameid'])

    bonus_df = pd.DataFrame({'video': bonus_video, 'frameid': bonus_frame})

    return combine_df(bonus_df, df)


def get_neighbor_frames(video, frameid, delta=100):

    def search(frameid, frameid_list):
        found_index = -1
        l, r = 0, len(frameid_list) - 1
        while l <= r:
            mid = (l + r)//2
            if frameid_list[mid] >= frameid:
                print(frameid_list[mid])
                found_index = mid
                r = mid - 1
            else:
                l = mid + 1

        return found_index

    def get_neighbor_list(frameid, frameid_list, delta):
        index = search(frameid, frameid_list)
        left, right = max(
            0, index - delta), min(len(frameid_list), index + delta)
        neighbor_frameid_list = frameid_list[left:right]
        return neighbor_frameid_list

    path_frames_video = os.path.join(PATH_KEYFRAMES, video)
    frameid_list = sorted(os.listdir(path_frames_video))

    neighbor_frameid_list = get_neighbor_list(frameid, frameid_list, delta)
    neighbor_frameid_list = [os.path.join(
        path_frames_video, file) for file in neighbor_frameid_list]

    return neighbor_frameid_list


## *1. Load top samples with highest score from inference notebook* 

In [2]:
id = 86

In [3]:
dataset = fo.Dataset.from_dir(
    dataset_dir='data/result-v3/kaggle/working/{}_top_k_images'.format(id),
    dataset_type=fo.types.FiftyOneDataset
)


Importing samples...
 100% |█████████████████| 500/500 [11.4ms elapsed, 0s remaining, 43.7K samples/s]      
Import complete


## *2. Adjust Submission*

### 2.1 Launch App

In [4]:
session = fo.launch_app(dataset, auto=False)
session.open_tab()


Session launched. Run `session.show()` to open the App in a cell output.


<IPython.core.display.Javascript object>

In [5]:
session.show()


### 2.2 Finalize Result 

In [11]:
df_submission = pd.DataFrame({'video': [], 'frameid': []})

#### 2.2.1 Select 100 first samples


In [13]:
# Select
head_100_samples_id = [sample['id'] for sample in dataset][:100]
session.select_samples(head_100_samples_id)


In [14]:
# Get selected samples id
selected_samples = session.selected
# Add to submission
df_submission = add_confident_samples(dataset, selected_samples, df_submission)
# Clear selected samples
session.clear_selected()


#### 2.2.2 Modify result

Choose (tick select) samples which are reliable <br />
<span style="color:red">**Notice**:</span> *The order of samples add to the submission dataframe is equivalent to the order of the user choose*

In [39]:
# Select GUI above

In [16]:
# Get selected samples id
selected_samples = session.selected
# Add to submission
df_submission = add_confident_samples(dataset, selected_samples, df_submission)
# Clear selected samples
session.clear_selected()


*Optional*: If can not find desireable samples, but we suspect some samples that related to answer. Otherwise, skipping below stuff and moving to **Section 3**
- **1st alternative method**: 2.2.2.1 Get neighbor frames around that frames -> append to above submission
- **2nd alternative method**: 2.2.2.2 Have a timestamp on video which are matched with query -> append to above submission

##### 2.2.2.1 Select neighbor frames

- Input video and frameid of suspected sample

In [1]:
video = 'C00_V0000'
frameid = '007006.jpg'

- Export neighbor frames of suspected sample

In [None]:
neighbor_frameid_list = get_neighbor_frames(
    video=video, frameid=frameid, delta=100)

neighbor_dataset = fo.Dataset.from_images(
    neighbor_frameid_list
)

for sample in neighbor_dataset:
    _, sample['video'], sample['frameid'] = sample['filepath'][:-
                                                               4].rsplit('/', 2)
    sample.save()


neighbor_session = fo.launch_app(neighbor_dataset, auto=False)
neighbor_session.show()


Visualize and select neighbor that match with query

In [None]:
# Select


In [None]:
# Get selected samples id
selected_samples = neighbor_session.selected
# Add to submission
df_submission = add_confident_samples(neighbor_dataset, selected_samples, df_submission)
# Clear selected samples
neighbor_session.clear_selected()

##### 2.2.2.2 Get frames based on timestamp

In [None]:
VIDEO = 'C00_V01'
TIMESTAMP = '1:01:01'

df_bonus_add = get_frame_based_on_timestamp()
df_submission = combine_df(df_bonus_add, df_submission)



## 3. Validate by viewing video

In [None]:
FRAME = ''
VIDEO = ''

# get_url = True -> return location url youtube. Otherwise, return location local comp
location, timestamp = get_location_timestamp(get_url = False)



## 4. Export submission 

In [15]:
df_submission

Unnamed: 0,video,frameid
0,C00_V0000,004126
1,C00_V0000,004241
2,C00_V0001,020942
3,C00_V0003,008117
4,C00_V0003,021520
...,...,...
95,C00_V0075,017468
96,C00_V0077,025628
97,C00_V0078,020663
98,C00_V0079,018961


In [81]:
df_submission.to_csv('data/result-v3/submission/query-{}.csv'.format(id),
          index=False, header=False)
