In [2]:

import os
import json
import numpy as np
import pandas as pd
from collections import OrderedDict, defaultdict

import torch
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader

from la_detr.config import TestOptions
from la_detr.model import build_model
from la_detr.span_utils import span_cxw_to_xx
from la_detr.start_end_dataset import StartEndDataset, start_end_collate, prepare_batch_inputs
from la_detr.start_end_dataset_audio import \
    StartEndDataset_audio, start_end_collate_audio, prepare_batch_inputs_audio
from la_detr.postprocessing_qd_detr import PostProcessorDETR
from standalone_eval.eval import *
from utils.basic_utils import save_jsonl, save_json, load_jsonl
from utils.temporal_nms import temporal_nms
from la_detr.inference import *

In [3]:
file_name = 'results/ours/best_hl_val_preds_metrics.jsonl'

In [17]:
def return_qid2ap(submission, moment_len_idx):

    ground_truth = load_jsonl('/workspace/qd-la-crop/data/highlight_val_release.jsonl')
    
    metrics = eval_submission(
                submission, ground_truth,
                verbose=False, match_number=not False
        )
    
    l_ranges = [[0, 10], [10, 30], [30, 150], [0, 150], ]  
    names = ["short", "middle", "long", "full"]

    l_range =  l_ranges[moment_len_idx]
    name = names[moment_len_idx]


    _submission, _ground_truth = get_data_by_range(submission, ground_truth, l_range)
    print(f"{name}: {l_range}, {len(_ground_truth)}/{len(ground_truth)}="
                f"{100*len(_ground_truth)/len(ground_truth):.2f} examples.")
    
    iou_thds=np.linspace(0.5, 0.95, 10)
    max_gt_windows=None
    max_pred_windows=10

    iou_thds = [float(f"{e:.2f}") for e in iou_thds]
    pred_qid2data = defaultdict(list)
    for d in _submission:
        pred_windows = d["pred_relevant_windows"][:max_pred_windows] \
            if max_pred_windows is not None else d["pred_relevant_windows"]
        qid = d["qid"]
        for w in pred_windows:
            pred_qid2data[qid].append({
                "video-id": d["qid"],  # in order to use the API
                "t-start": w[0],
                "t-end": w[1],
                "score": w[2]
            })

    gt_qid2data = defaultdict(list)
    for d in _ground_truth:
        gt_windows = d["relevant_windows"][:max_gt_windows] \
            if max_gt_windows is not None else d["relevant_windows"]
        qid = d["qid"]
        for w in gt_windows:
            gt_qid2data[qid].append({
                "video-id": d["qid"],
                "t-start": w[0],
                "t-end": w[1]
            })
    qid2ap_list = {}
    # start_time = time.time()
    data_triples = [[qid, gt_qid2data[qid], pred_qid2data[qid]] for qid in pred_qid2data]

    from functools import partial
    compute_ap_from_triple = partial(
        compute_average_precision_detection_wrapper, tiou_thresholds=iou_thds)


    for data_triple in data_triples:
        qid, scores = compute_ap_from_triple(data_triple)
        qid2ap_list[qid] = scores


    # print(f"compute_average_precision_detection {time.time() - start_time:.2f} seconds.")
    ap_array = np.array(list(qid2ap_list.values()))  # (#queries, #thd)
    ap_thds = ap_array.mean(0)  # mAP at different IoU thresholds.
    iou_thd2ap = dict(zip([str(e) for e in iou_thds], ap_thds))
    iou_thd2ap["average"] = np.mean(ap_thds)
    # formatting
    iou_thd2ap = {k: float(f"{100 * v:.2f}") for k, v in iou_thd2ap.items()}

    return metrics, iou_thd2ap, qid2ap_list

In [18]:
iou_thds=np.linspace(0.5, 0.95, 10)

In [33]:
qd_org = load_jsonl('/workspace/qd_org/results/video_checkpoint/best_hl_val_preds.jsonl')
qd_crop = load_jsonl('/workspace/qd-la-crop/results/hl-video_tef-base_crop-2024_07_26_05_01_02/best_hl_val_preds.jsonl')
qd_crop_adv = load_jsonl('/workspace/qd-la-crop/results/hl-video_tef-base_crop_adv-2024_07_31_08_51_52/best_hl_val_preds.jsonl')

la_org = load_jsonl('/workspace/qd-la-crop/results/ours/best_hl_val_preds.jsonl')
la_crop = load_jsonl('/workspace/qd-la-crop/results/hl-video_tef-tgt_cc_crop-2024_07_26_05_00_43/best_hl_val_preds.jsonl')
la_crop_adv = load_jsonl('/workspace/qd-la-crop/results/hl-video_tef-tgt_cc_crop_adv-2024_07_31_08_51_40/best_hl_val_preds.jsonl')

In [34]:
ground_truth = load_jsonl('/workspace/qd-la-crop/data/highlight_val_release.jsonl')

In [36]:
qd_org_metrics, qd_org_iou_thd2ap, qd_org_qid2ap_list = return_qid2ap(qd_org, 3)
qd_crop_metrics, qd_crop_iou_thd2ap, qd_crop_qid2ap_list = return_qid2ap(qd_crop, 3)
qd_crop_adv_metrics, qd_crop_adv_iou_thd2ap, qd_crop_adv_qid2ap_list = return_qid2ap(qd_crop_adv, 3)

la_org_metrics, la_org_iou_thd2ap, la_org_qid2ap_list = return_qid2ap(la_org, 3)
la_crop_metrics, la_crop_iou_thd2ap, la_crop_qid2ap_list = return_qid2ap(la_crop, 3)
la_crop_adv_metrics, la_crop_adv_iou_thd2ap, la_crop_adv_qid2ap_list = return_qid2ap(la_crop_adv, 3)

short: [0, 10], 429/1550=27.68 examples.
middle: [10, 30], 957/1550=61.74 examples.
long: [30, 70], 432/1550=27.87 examples.
very long: [70, 150], 155/1550=10.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
short: [0, 10], 429/1550=27.68 examples.
middle: [10, 30], 957/1550=61.74 examples.
long: [30, 70], 432/1550=27.87 examples.
very long: [70, 150], 155/1550=10.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
short: [0, 10], 429/1550=27.68 examples.
middle: [10, 30], 957/1550=61.74 examples.
long: [30, 70], 432/1550=27.87 examples.
very long: [70, 150], 155/1550=10.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
short: [0, 10], 429/1550=27.68 examples.
middle: [10, 30], 957/1550=61.74 examples.
long: [30, 70], 432/1550=27.87 examples.
very long: [70, 150], 155/1550=10.00 examples.
full: [0, 150], 1550/1550=100.00 examples.
full: [0, 1

In [40]:
short_qid = []
for g in ground_truth:
    for d in g['relevant_windows']:
        l = (d[1] - d[0])
        if l>0 and l <= 10:
            short_qid.append(g['qid'])
            break

In [37]:
short_qd_org_ap = []
short_qd_crop_ap = []
short_qd_crop_adv_ap = []

short_la_org_ap = []
short_la_crop_ap = []
short_la_crop_adv_ap = []
for sq in short_qid:
    short_qd_org_ap.append(qd_org_qid2ap_list[sq])
    short_qd_crop_ap.append(qd_crop_qid2ap_list[sq])
    short_qd_crop_adv_ap.append(qd_crop_adv_qid2ap_list[sq])
    
    short_la_org_ap.append(la_org_qid2ap_list[sq])
    short_la_crop_ap.append(la_crop_qid2ap_list[sq])
    short_la_crop_adv_ap.append(la_crop_adv_qid2ap_list[sq])

In [38]:
import collections
iou_thd2ap = collections.defaultdict(list)

for short_ap in [short_qd_org_ap,short_qd_crop_ap,short_qd_crop_adv_ap,short_la_org_ap,short_la_crop_ap,short_la_crop_adv_ap]:
    ap_array = np.array(short_ap)
    ap_thds = ap_array.mean(0)  # mAP at different IoU thresholds.
    for x, y in zip([str(e) for e in iou_thds], ap_thds):
        iou_thd2ap[x].append(y)
    iou_thd2ap["average"].append(np.mean(ap_thds))

In [39]:
iou_thd2ap

defaultdict(list,
            {'0.5': [0.3919698935733879,
              0.36615441629159534,
              0.3698598419428977,
              0.4263280664029622,
              0.4197236838421226,
              0.41077827741430684],
             '0.55': [0.3228458914120434,
              0.29880310962814244,
              0.3067412423472293,
              0.36902409506165335,
              0.3580255219610266,
              0.3403769663379761],
             '0.6': [0.30294455580557966,
              0.2792851271635232,
              0.2823841796568333,
              0.34658181441861513,
              0.3372249056301075,
              0.31949913275180924],
             '0.65': [0.26018497601948476,
              0.2508727308161452,
              0.22721874629175653,
              0.3029425844652825,
              0.30474138317777927,
              0.28685979836033837],
             '0.7': [0.2155275612625862,
              0.21192131381525328,
              0.1893668695329536,
           