In [1]:
import jsonlines
import numpy as np
from collections import defaultdict

In [2]:
basedir = "../outputs/VSR_random"

models = {
    "albef_4m": "ALBEF$_{4M}$",
    "albef_14m": "ALBEF$_{14M}$",
    "blip_14m": "BLIP$_{14M}$",
    "blip_129m": "BLIP$_{129M}$",
    "blip_capfiltl_129m": "BLIP$_{129M}$+CapFilt-L",
    "blip_vitl_129m": "BLIP-ViT/L$_{129M}$",
    "pevl_pretraining": "PEVL$_{pretrain}$",
    "pevl_grounding": "PEVL$_{grounding}$",
    "pevl_vrd": "PEVL$_{vrd}$",
    "x-vlm_4m": "X-VLM$_{4M}$",
    "x-vlm_16m": "X-VLM$_{16M}$",
    "blip2_itm_pretrain": "BLIP-2",
    }

In [4]:
adj_rels = """
adjacent to, alongside, at the side of, at the right side of, at the left side of,
 attached to, at the back of, ahead of, against, at the edge of
"""

dir_rels = """
off, past, toward, down, away from, along, around, into, across, across from, down from
"""

ori_rels = """
facing, facing away from, parallel to, perpendicular to
"""

proj_rels = """
on top of, beneath, beside, behind, left of, right of, under, in front of, below, above, over, in the middle of
"""

prox_rels = """
by, close to, near, far from, far away from
"""

topo_rels = """
connected to, detached from, has as a part, part of, contains, within, at, on, in, with, surrounding, among,
consists of, out of, between, inside, outside, touching
"""

unalloc_rels = """
beyond, next to, opposite to, among, enclosed by
"""

cat2relation = {
    'adjacency': [e.strip() for e in adj_rels.split(',')],
    'directional': [e.strip() for e in dir_rels.split(',')],
    'orientation': [e.strip() for e in ori_rels.split(',')],
    'projective': [e.strip() for e in proj_rels.split(',')],
    'proximity': [e.strip() for e in prox_rels.split(',')],
    'topological': [e.strip() for e in topo_rels.split(',')],
    'unallocated': [e.strip() for e in unalloc_rels.split(',')],
}
relation2cat = {v: k for k, l in cat2relation.items() for v in l}

In [5]:
for cat in cat2relation:
    print('\\textbf{%s}' % cat.capitalize(), end = ' & ')

\textbf{Adjacency} & \textbf{Directional} & \textbf{Orientation} & \textbf{Projective} & \textbf{Proximity} & \textbf{Topological} & \textbf{Unallocated} & 

## Test

In [3]:
model = list(models.keys())[0]
with jsonlines.open(f'{basedir}/{model}/{model}_test.jsonl') as reader:
    lines = [obj for obj in reader]

lines[:2]

[{'label': 1,
  'score': 0.34567856788635254,
  'relation': 'in front of',
  'id': 0},
 {'label': 1, 'score': 0.08471976220607758, 'relation': 'behind', 'id': 1}]

In [6]:
for model in models:
    with jsonlines.open(f'{basedir}/{model}/{model}_test.jsonl') as reader:
        lines = [obj for obj in reader]
    res_dict = defaultdict(list)
    for i in range(0, len(lines)):
        l = lines[i]
        if l['label']:
            res = l['score'] >= 0.5
        else:
            res = l['score'] < 0.5
        res_dict['overall'].append(res)
        res_dict[relation2cat[l['relation']]].append(res)
    
    print(f'{models[model]} &', end=' ')
    for k in cat2relation.keys():
        print(f'{np.mean(res_dict[k])*100:.1f}', end=' & ')
    print(f'{np.mean(res_dict["overall"])*100:.1f} \\\\')

ALBEF$_{4M}$ & 51.1 & 42.2 & 58.0 & 60.2 & 55.3 & 59.2 & 56.9 & 57.3 \\
ALBEF$_{14M}$ & 54.2 & 40.0 & 58.0 & 62.6 & 52.0 & 58.9 & 58.8 & 58.3 \\
BLIP$_{14M}$ & 49.3 & 50.0 & 47.3 & 49.3 & 48.0 & 51.8 & 41.2 & 49.7 \\
BLIP$_{129M}$ & 41.2 & 52.2 & 53.6 & 45.4 & 49.6 & 49.7 & 37.3 & 46.9 \\
BLIP$_{129M}$+CapFilt-L & 49.3 & 57.8 & 53.6 & 45.5 & 47.2 & 51.1 & 41.2 & 48.7 \\
BLIP-ViT/L$_{129M}$ & 51.8 & 58.9 & 52.7 & 48.5 & 43.9 & 51.8 & 47.1 & 50.3 \\
PEVL$_{pretrain}$ & 55.3 & 48.9 & 56.2 & 60.8 & 48.8 & 57.4 & 58.8 & 57.5 \\
PEVL$_{grounding}$ & 53.5 & 50.0 & 52.7 & 59.4 & 54.5 & 60.2 & 58.8 & 57.7 \\
PEVL$_{vrd}$ & 55.6 & 52.2 & 53.6 & 60.4 & 54.5 & 63.1 & 64.7 & 59.5 \\
X-VLM$_{4M}$ & 57.7 & 43.3 & 52.7 & 66.1 & 54.5 & 68.4 & 62.7 & 63.0 \\
X-VLM$_{16M}$ & 58.5 & 46.7 & 58.0 & 67.7 & 52.0 & 68.7 & 68.6 & 64.3 \\
BLIP-2 & 54.9 & 43.3 & 57.1 & 63.6 & 51.2 & 67.0 & 66.7 & 61.5 \\


## Dev

In [7]:
model = list(models.keys())[0]
with jsonlines.open(f'{basedir}/{model}/{model}_dev.jsonl') as reader:
    lines = [obj for obj in reader]

lines[:2]

[{'label': 1, 'score': 0.9958904385566711, 'relation': 'beside', 'id': 0},
 {'label': 0, 'score': 0.0012446464970707893, 'relation': 'touching', 'id': 1}]

In [8]:
for model in models:
    with jsonlines.open(f'{basedir}/{model}/{model}_dev.jsonl') as reader:
        lines = [obj for obj in reader]
    res_dict = defaultdict(list)
    for i in range(0, len(lines)):
        l = lines[i]
        if l['label']:
            res = l['score'] >= 0.5
        else:
            res = l['score'] < 0.5
        res_dict['overall'].append(res)
        res_dict[relation2cat[l['relation']]].append(res)
    
    print(f'{models[model]} &', end=' ')
    for k in cat2relation.keys():
        print(f'{np.mean(res_dict[k])*100:.1f}', end=' & ')
    print(f'{np.mean(res_dict["overall"])*100:.1f} \\\\')

ALBEF$_{4M}$ & 52.3 & 38.6 & 55.9 & 61.7 & 56.2 & 58.6 & 65.6 & 58.0 \\
ALBEF$_{14M}$ & 52.3 & 59.1 & 55.9 & 59.8 & 46.9 & 66.8 & 71.9 & 60.2 \\
BLIP$_{14M}$ & 56.8 & 56.8 & 57.6 & 42.5 & 51.6 & 45.1 & 50.0 & 47.4 \\
BLIP$_{129M}$ & 44.7 & 43.2 & 52.5 & 53.6 & 53.1 & 50.2 & 40.6 & 50.5 \\
BLIP$_{129M}$+CapFilt-L & 57.6 & 36.4 & 47.5 & 45.9 & 48.4 & 48.5 & 37.5 & 47.7 \\
BLIP-ViT/L$_{129M}$ & 56.1 & 29.5 & 49.2 & 46.9 & 53.1 & 49.8 & 46.9 & 48.7 \\
PEVL$_{pretrain}$ & 47.0 & 56.8 & 57.6 & 61.9 & 51.6 & 62.4 & 71.9 & 59.3 \\
PEVL$_{grounding}$ & 53.8 & 65.9 & 59.3 & 60.9 & 60.9 & 62.7 & 75.0 & 61.1 \\
PEVL$_{vrd}$ & 54.5 & 59.1 & 61.0 & 59.8 & 59.4 & 64.1 & 68.8 & 60.7 \\
X-VLM$_{4M}$ & 57.6 & 56.8 & 59.3 & 69.2 & 57.8 & 71.2 & 75.0 & 66.6 \\
X-VLM$_{16M}$ & 61.4 & 65.9 & 64.4 & 68.4 & 62.5 & 70.5 & 84.4 & 67.9 \\
BLIP-2 & 59.8 & 50.0 & 52.5 & 59.8 & 56.2 & 66.4 & 75.0 & 61.2 \\


## Dev / Test

In [9]:
for model in models:
    with jsonlines.open(f'{basedir}/{model}/{model}_dev.jsonl') as reader:
        dev_lines = [obj for obj in reader]
    with jsonlines.open(f'{basedir}/{model}/{model}_test.jsonl') as reader:
        test_lines = [obj for obj in reader]
    dev_dict = defaultdict(list)
    for i in range(0, len(dev_lines)):
        l = dev_lines[i]
        if l['label']:
            res = l['score'] >= 0.5
        else:
            res = l['score'] < 0.5
        dev_dict['overall'].append(res)
        dev_dict[relation2cat[l['relation']]].append(res)
    test_dict = defaultdict(list)
    for i in range(0, len(test_lines)):
        l = test_lines[i]
        if l['label']:
            res = l['score'] >= 0.5
        else:
            res = l['score'] < 0.5
        test_dict['overall'].append(res)
        test_dict[relation2cat[l['relation']]].append(res)
    
    print(f'{models[model]} &', end=' ')
    for k in cat2relation.keys():
        print(f'{np.mean(dev_dict[k])*100:.1f}/{np.mean(test_dict[k])*100:.1f}', end=' & ')
    print(f'{np.mean(dev_dict["overall"])*100:.1f}/{np.mean(test_dict["overall"])*100:.1f} \\\\')

ALBEF$_{4M}$ & 52.3/51.1 & 38.6/42.2 & 55.9/58.0 & 61.7/60.2 & 56.2/55.3 & 58.6/59.2 & 65.6/56.9 & 58.0/57.3 \\
ALBEF$_{14M}$ & 52.3/54.2 & 59.1/40.0 & 55.9/58.0 & 59.8/62.6 & 46.9/52.0 & 66.8/58.9 & 71.9/58.8 & 60.2/58.3 \\
BLIP$_{14M}$ & 56.8/49.3 & 56.8/50.0 & 57.6/47.3 & 42.5/49.3 & 51.6/48.0 & 45.1/51.8 & 50.0/41.2 & 47.4/49.7 \\
BLIP$_{129M}$ & 44.7/41.2 & 43.2/52.2 & 52.5/53.6 & 53.6/45.4 & 53.1/49.6 & 50.2/49.7 & 40.6/37.3 & 50.5/46.9 \\
BLIP$_{129M}$+CapFilt-L & 57.6/49.3 & 36.4/57.8 & 47.5/53.6 & 45.9/45.5 & 48.4/47.2 & 48.5/51.1 & 37.5/41.2 & 47.7/48.7 \\
BLIP-ViT/L$_{129M}$ & 56.1/51.8 & 29.5/58.9 & 49.2/52.7 & 46.9/48.5 & 53.1/43.9 & 49.8/51.8 & 46.9/47.1 & 48.7/50.3 \\
PEVL$_{pretrain}$ & 47.0/55.3 & 56.8/48.9 & 57.6/56.2 & 61.9/60.8 & 51.6/48.8 & 62.4/57.4 & 71.9/58.8 & 59.3/57.5 \\
PEVL$_{grounding}$ & 53.8/53.5 & 65.9/50.0 & 59.3/52.7 & 60.9/59.4 & 60.9/54.5 & 62.7/60.2 & 75.0/58.8 & 61.1/57.7 \\
PEVL$_{vrd}$ & 54.5/55.6 & 59.1/52.2 & 61.0/53.6 & 59.8/60.4 & 59.4/54.5 