In [1]:
import json
import ast
import re

In [3]:
def save_json(content, save_path):
    with open(save_path, 'w') as f:
        f.write(json.dumps(content))
def load_jsonl(filename):
    with open(filename, "r") as f:
        return [json.loads(l.strip("\n")) for l in f.readlines()]
def load_json(filename):
    with open(filename, "r") as f:
        return json.load(f)
def save_jsonl(content, save_path):
    with open(save_path, 'w') as f:
        for l in content:
            f.write(json.dumps(l) + "\n")


In [4]:
def moment_str_to_list(m):
        """Convert a string of moments to a list of moments.
        If predicted string is not a list, it means that the model has not yet learned to predict the right format.
        In that case, we return [[-1, -1]] to represent an error.
        This will then lead to an IoU of 0.
        Args:
            m (str): a string of moments, e.g. "[[0, 1], [4, 7]]"
        Returns:
            list: a list of moments, e.g. [[0, 1], [4, 7]]
        """
        if m == "[[-1, -1]]":
            return [[-1, -1]]

        # check if the string has the right format of a nested list using regex
        # the list should look like this: [[0, 1], [4, 7], ...]
        # if not, return [[-1, -1]]
        if not re.match(r"\[\[.*\]\]", m):
            return [[-1, -1]]

        try:
            _m = ast.literal_eval(m)
        except:
            return [[-1, -1]]

        # if _m is not a list, it means that the model has not predicted any relevant windows
        # return error
        if not isinstance(_m, list):
            # raise ValueError()
            return [[-1, -1]]

        # if a sublist of _m has more than 2 elements, it means that the model has not learned to predict the right format
        # substitute that sublist with [-1, -1]
        for i in range(len(_m)):
            if len(_m[i]) != 2:
                # print(f"Got a sublist with more or less than 2 elements!{_m[i]}")
                _m[i] = [-1, -1]

        return _m

In [7]:
# QVH Gated Cross Attention
data_file = 'test_epochbest.json'
meta_file = 'highlight_test_release.jsonl'

data = load_json(data_file)
meta_data = load_jsonl(meta_file)

In [9]:
data[0]

{'qid': 'QVHighlight_3158_0',
 'raw_prediction': '[[0, 110]]',
 'prediction': '[[0, 110]]',
 'target': '[[0, 150]]',
 'duration': 150.0}

In [11]:
for d in data:
    d['prediction'] = moment_str_to_list(d['prediction'])

    # add a dummy confidence score
    d['prediction'] = [m + [1.0] for m in d['prediction']]

In [13]:
data[2]

{'qid': 'QVHighlight_998_2',
 'raw_prediction': '[[24, 54]]',
 'prediction': [[24, 54, 1.0]],
 'target': '[[0, 150]]',
 'duration': 150.0}

In [15]:
new_meta_data = {}
for meta_d in meta_data:
    new_meta_data[meta_d['qid']] = meta_d

print(len(meta_data), len(new_meta_data))
print(new_meta_data)

1542 1542


Format:
```json
{
  "qid": 2579,
  "query": "A girl and her mother cooked while talking with each other on facetime.",
  "vid": "NUsG9BgSes0_210.0_360.0",
  "pred_relevant_windows": [
    [0, 70, 0.9986],
    [78, 146, 0.4138],
    [0, 146, 0.0444],
    ...
  ],  
  "pred_saliency_scores": [-0.2452, -0.3779, -0.4746, ...]
}
```


In [22]:
def get_submission(data, meta_data):
    submissions = []
    for d in data:
        out = {}
        qid = int(d['qid'].split('_')[1])
        out["qid"] = qid
        out["query"] = meta_data[qid]["query"]
        out["vid"] = meta_data[qid]["vid"]
        out["pred_relevant_windows"] = [[float(item) for item in sublist] for sublist in d["prediction"]]
        out["pred_saliency_scores"] = [1.0] * len(d["prediction"])
        submissions.append(out)
    return submissions

In [24]:
test_sumission = get_submission(data, new_meta_data)
test_sumission[2]

{'qid': 998,
 'query': 'Video game toys are on display.',
 'vid': 'xtmc4rgoxU4_60.0_210.0',
 'pred_relevant_windows': [[24.0, 54.0, 1.0]],
 'pred_saliency_scores': [1.0]}

In [26]:
type(test_sumission[2]["pred_relevant_windows"])

list

In [28]:
# Save submission_1
save_jsonl(test_sumission, 'hl_test_submission_float.jsonl')

In [30]:
data_file = 'val_epoch11.json'
meta_file = 'highlight_val_release.jsonl'

data = load_json(data_file)
meta_data = load_jsonl(meta_file)

print(data[0])

for d in data:
    d['prediction'] = moment_str_to_list(d['prediction'])
    d['prediction'] = [m + [1.0] for m in d['prediction']]

print(data[0])

new_meta_data = {}
for meta_d in meta_data:
    new_meta_data[meta_d['qid']] = meta_d

print(len(meta_data), len(new_meta_data))
print(new_meta_data)

{'qid': 'QVHighlight_2579_0', 'raw_prediction': '[[96, 150]]', 'prediction': '[[96, 150]]', 'target': '[[82, 150]]', 'duration': 150}
{'qid': 'QVHighlight_2579_0', 'raw_prediction': '[[96, 150]]', 'prediction': [[96, 150, 1.0]], 'target': '[[82, 150]]', 'duration': 150}
1550 1550
{2579: {'qid': 2579, 'query': 'A girl and her mother cooked while talking with each other on facetime.', 'duration': 150, 'vid': 'NUsG9BgSes0_210.0_360.0', 'relevant_clip_ids': [41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74], 'saliency_scores': [[1, 1, 2], [1, 1, 3], [1, 1, 3], [1, 1, 3], [1, 1, 3], [1, 1, 2], [1, 1, 3], [2, 1, 4], [2, 1, 4], [2, 1, 4], [2, 2, 4], [2, 2, 3], [3, 2, 2], [4, 2, 2], [4, 2, 2], [4, 2, 2], [3, 2, 2], [3, 2, 3], [3, 2, 3], [4, 2, 3], [3, 2, 3], [3, 2, 3], [3, 2, 3], [3, 2, 3], [3, 2, 2], [3, 2, 2], [3, 2, 2], [3, 2, 3], [3, 2, 4], [3, 2, 3], [3, 2, 4], [3, 2, 4], [3, 1, 3], [3, 1, 4]], 'relevant

In [32]:
val_sumission = get_submission(data, new_meta_data)
val_sumission[2]

{'qid': 2321,
 'query': 'A girl opening post office mails in a car',
 'vid': 'r7A-cfBq2Xw_210.0_360.0',
 'pred_relevant_windows': [[84.0, 120.0, 1.0], [122.0, 150.0, 1.0]],
 'pred_saliency_scores': [1.0, 1.0]}

In [34]:
save_jsonl(val_sumission, 'hl_val_submission_float.jsonl')