In [1]:
import xml.etree.ElementTree as ET
import requests
import json
import numpy as np

def get_images_from_query(topic_dict):
    """
    Getting list of top-100 images based on a query
    """
    URL = "http://localhost:8001/images/"
    PARAMS = {
        "query":
            {
                "before": topic_dict["before"],
                "beforewhen": topic_dict["beforewhen"],
                "after": topic_dict["after"],
                "afterwhen": topic_dict["afterwhen"],
                "current": topic_dict["current"],
                "isQuestion": False,
                "info": None
            },
        "gps_bounds": None,
        "starting_from": 0,
        "share_info": False,
        "size": 200
    }
    response = requests.post(URL, data=json.dumps(PARAMS), headers={'Content-Type': 'application/json'})
    data = response.json()
    images = [triplet["current"] for triplet in data["results"]]
    return images

In [2]:
def get_more():
    """
    Getting list of top-100 images based on a query
    """
    URL = "http://localhost:8001/more/"
    PARAMS = {}
    response = requests.post(URL, data=json.dumps(PARAMS), headers={'Content-Type': 'application/json'})
    data = response.json()
    images = [triplet["current"] for triplet in data["results"]]
    return images

# LSC22

In [3]:
# Read LSC22 queries
end_id = "LSC-121"
queries = {}
with open("LSC22queries.txt") as f:
    all_lines = f.readlines()
    all_lines = [line.strip() for line in all_lines]
    print(len(all_lines))
    i = 0
    id = ""
    hints = []
    while i < len(all_lines):
        line = all_lines[i]
        if line.startswith("LSC"):
            id = line
            hints = all_lines[i+1 : i+7] #6 hints
            targets = []
            i = i + 7
            while all_lines[i].strip():
                targets.append(all_lines[i])
                i += 1
            queries[id] = {"hints": hints, 
                           "targets": targets}
            if id == end_id:
                break
        i += 1

293


In [31]:
rewritten_queries = []
for query in queries.values():
    new_query = []
    last_hint = ""
    for hint in query["hints"]:
        new_last_hint = hint
        hint = hint.replace(last_hint, "")
        last_hint = new_last_hint.strip('. ')
        new_query.append({"current": hint, "before": "", "beforewhen": None, "after": "", "afterwhen": None})
    rewritten_queries.append(new_query)
# json.dump(rewritten_queries, open("LSC22_split.json", "w"))
# rewritten_queries = json.load(open("LSC22_split.json"))

In [34]:
# cached = {}
# json.dump(cached, open("image_cached.json", "w"))
# cached = json.load(open("image_cached.json"))
json.dump(cached, open("scene_cached.json", "w"))
# cached = json.load(open("scene_cached.json"))

## Sending requests

In [32]:
from collections import defaultdict
from tqdm.auto import tqdm
from json.decoder import JSONDecodeError
stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
mod_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
printed = defaultdict(list)
ave_lengths = []
ave_results = []
recall = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
precision = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
mAP = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
# rewritten_queries = json.load(open("LSC22_split.json"))
targets = [query["targets"] for query in queries.values()]
for i, query in tqdm(enumerate(rewritten_queries), total=len(rewritten_queries)):
    last_hint = ""
    for j, hint in enumerate(query):
        hint["current"] = last_hint + hint["current"]
        hint["current"] = hint["current"].replace("..", ".")
        last_hint = hint["current"]
        try:
            if f"{i}_hint{j}" in cached:
                results = cached[f"{i}_hint{j}"]
            else:
                results = get_images_from_query(hint)
                while len(results) < 100:
                    new_results = get_more()
                    if new_results:
                        results.extend(new_results)
                        print(len(results), len(new_results))
                    else:
                        break
                cached[f"{i}_hint{j}"] = results
        except JSONDecodeError as e:
            raise(e)
            results = []
        ave_results.append(len(results))
        ave_lengths.extend([len(scene) for scene in results])
        for k in [1, 3, 5, 10, 20, 21, 50]:
            idx = []
            all_images = []
            # Scene based
            for scene in results[:k]:
                ranked_images = sorted(scene, key=lambda x: x[2], reverse=True) # True -> False
                ranked_images = [image[0].split(".")[0].split("/")[-1] for image in ranked_images]
                all_images.extend(ranked_images)
            # # Image based
            # for scene in results[:k]:
            #     all_images.append(scene[0].split(".")[0].split("/")[-1])
            matches = set.intersection(set(all_images), set(targets[i]))
            recall[i][j][k] = len(matches) / len(targets[i])
            precision[i][j][k] = len(matches) / len(all_images)
            ap = []
            for t in range(1, k+1):
                small_matches = set.intersection(set(all_images[:t]), set(targets[i]))
                ap.append(len(small_matches) / t)
            mAP[i][j][k] = np.mean(ap)
            if len(matches) == 0:
                if j > 0:
                    if stats[i][j-1][k] > 0 and j not in printed[i]:
                        printed[i].append(j)
                    mod_stats[i][j][k] = mod_stats[i][j-1][k]
            else:
                stats[i][j][k] = 1
                mod_stats[i][j][k] = 1
        

  0%|          | 0/14 [00:00<?, ?it/s]

{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden Buddha in a tunnel.', 'before': '', 'beforewhen': None, 'after': '', 'afterwhen': None}
{'current': 'I was praying to small golden 

In [33]:
import numpy as np
for hint_j in range(6):
    print(hint_j + 1, end=" & ")
    for hit_k in [1, 3, 5, 10, 20, 50]:
        result_hit = []
        for i in range(len(queries)):
            result_hit.append(stats[i][hint_j][hit_k])
        print(f"{np.mean(result_hit):0.2f}", end=" & ")
    print("\\\\")
# Image based
# 1 & 0.29 & 0.43 & 0.50 & 0.64 & 0.71 & 0.86 & \\
# 2 & 0.57 & 0.79 & 0.79 & 0.79 & 0.86 & 0.86 & \\
# 3 & 0.71 & 0.86 & 0.86 & 0.86 & 0.86 & 0.86 & \\
# 4 & 0.64 & 0.71 & 0.79 & 0.79 & 0.79 & 0.79 & \\
# 5 & 0.57 & 0.64 & 0.64 & 0.71 & 0.71 & 0.71 & \\
# 6 & 0.64 & 0.64 & 0.64 & 0.71 & 0.71 & 0.71 & \\
# Scene based
# 1 & 0.50 & 0.57 & 0.57 & 0.71 & 0.71 & 0.86 & \\
# 2 & 0.64 & 0.79 & 0.79 & 0.79 & 0.86 & 0.93 & \\
# 3 & 0.79 & 0.86 & 0.86 & 0.86 & 0.86 & 0.86 & \\
# 4 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & \\
# 5 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\
# 6 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\

1 & 0.50 & 0.57 & 0.57 & 0.71 & 0.71 & 0.86 & \\
2 & 0.64 & 0.79 & 0.79 & 0.79 & 0.79 & 0.86 & \\
3 & 0.79 & 0.86 & 0.86 & 0.86 & 0.86 & 0.86 & \\
4 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & \\
5 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\
6 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\


In [38]:
for hint_j in range(6):
    print(hint_j + 1, end=" & ")
    result_hit = []
    for i in range(len(queries)):
        result_hit.append(recall[i][hint_j][3])
    print(f"{np.mean(result_hit):0.2f}", end=" & ")
    print("\\\\")
# Image based (Recall, precision, hit rate, map)
# 1 & 0.39 & 0.06 & 0.71 & 0.13 & \\
# 2 & 0.55 & 0.13 & 0.86 & 0.24 & \\
# 3 & 0.57 & 0.15 & 0.86 & 0.29 & \\
# 4 & 0.49 & 0.14 & 0.79 & 0.27 & \\
# 5 & 0.42 & 0.15 & 0.71 & 0.23 & \\
# 6 & 0.43 & 0.14 & 0.71 & 0.23 & \\
# Scene based
# 1 & 0.44 & 0.09 & 0.57 & 0.36 & \\
# 2 & 0.58 & 0.13 & 0.79 & 0.48 & \\
# 3 & 0.58 & 0.15 & 0.86 & 0.60 & \\
# 4 & 0.53 & 0.15 & 0.79 & 0.56 & \\
# 5 & 0.44 & 0.11 & 0.64 & 0.48 & \\
# 6 & 0.45 & 0.12 & 0.64 & 0.48 & \\

1 & 0.44 & \\
2 & 0.58 & \\
3 & 0.58 & \\
4 & 0.53 & \\
5 & 0.44 & \\
6 & 0.45 & \\


In [None]:
for hint_j in range(6):
    for hit_k in [3]:
        mod_result_hit = []
        for i in range(len(queries)):
            mod_result_hit.append(mod_stats[i][hint_j][hit_k])
        print(f"{np.mean(mod_result_hit):0.2f}", end=" & ")
    print("\\\\")

# Visualisation

In [None]:
import pandas as pd
import numpy as np
def parse_results(latex_str, model="cluster"):
    all_lines = []
    hint = 1
    for line in latex_str.split("\\"):
        if line:
            line = line.strip().split("&")
            for i, k in enumerate([1, 3, 5, 10, 20, 50]):
                all_lines.append([model, hint, k, float(line[i + 1])])
        hint += 1
    return pd.DataFrame(all_lines, columns=["Model", "hint", "K", "score"])
        
        
df = parse_results("""1 & 0.43 & 0.57 & 0.57 & 0.71 & 0.71 & 0.93 & \\
2 & 0.64 & 0.79 & 0.79 & 0.79 & 0.86 & 0.93 & \\
3 & 0.79 & 0.86 & 0.86 & 0.86 & 0.86 & 0.86 & \\
4 & 0.71 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & \\
5 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\
6 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\""", "Cluster")

df = pd.concat([df, parse_results("""1 & 0.29 & 0.50 & 0.57 & 0.71 & 0.71 & 0.79 & \\
2 & 0.64 & 0.71 & 0.79 & 0.79 & 0.79 & 0.86 & \\
3 & 0.57 & 0.64 & 0.71 & 0.79 & 0.86 & 0.86 & \\
4 & 0.57 & 0.64 & 0.71 & 0.79 & 0.79 & 0.79 & \\
5 & 0.57 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\
6 & 0.64 & 0.64 & 0.64 & 0.64 & 0.71 & 0.71 & \\""", "WTransf")])

df = pd.concat([df, parse_results("""1 & 0.21 & 0.21 & 0.21 & 0.43 & 0.50 & 0.57 & \\
2 & 0.43 & 0.50 & 0.57 & 0.57 & 0.64 & 0.64 & \\
3 & 0.43 & 0.64 & 0.64 & 0.64 & 0.71 & 0.86 & \\
4 & 0.43 & 0.64 & 0.71 & 0.71 & 0.71 & 0.71 & \\
5 & 0.50 & 0.64 & 0.64 & 0.64 & 0.71 & 0.71 & \\
6 & 0.57 & 0.57 & 0.64 & 0.64 & 0.71 & 0.71 & \\""", "Transf")])

df = pd.concat([df, parse_results("""1 & 0.50 & 0.57 & 0.57 & 0.71 & 0.71 & 0.86 & \\
2 & 0.64 & 0.79 & 0.79 & 0.79 & 0.86 & 0.93 & \\
3 & 0.79 & 0.86 & 0.86 & 0.86 & 0.86 & 0.86 & \\
4 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & 0.79 & \\
5 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\
6 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & 0.64 & \\""", "Mean")])


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
# reset the seaborn styles

colors = """#a6cee3
#1f78b4BB
#66c2a5BB
#fc8d62DD"""

d = {'color': colors.split("\n"), 
     "ls": ["-", "--", "-", '-.'], 
     "linewidth": [2, 1, 1, 1], 
     "marker": ["o", "h", ".", "P"]}

g = sns.FacetGrid(df, col="K", hue="Model", col_wrap=3, height=2.5, aspect=0.8, hue_kws=d)
sns.set_theme(style="white")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
# sns.set(font_scale=2)
g.map(sns.lineplot, "hint", "score")
g.set_axis_labels("Hint", "Recall")
g.set(ylim=(0, 1))
g.set(xticks=[1,2,3,4,5,6])
g.set_xticklabels(([1,2,3,4,5,6]))
g.fig.tight_layout()
g.add_legend()

# LSC21

In [None]:
data = json.load(open('LSC21queries.json'))
queries = data["queries"]
targets = data["targets"]

In [None]:
len(queries)

In [None]:
rewritten_queries = []
for query in queries.values():
    new_query = []
    last_hint = ""
    for hint in query:
        new_last_hint = hint
        hint = hint.replace(last_hint, "")
        last_hint = new_last_hint.strip('. ')
        new_query.append({"current": hint, "before": "", "beforewhen": None, "after": "", "afterwhen": None})
    rewritten_queries.append(new_query)
json.dump(rewritten_queries, open("rewritten_queries_clip.json", "w"))

In [None]:
null = None
results = get_images_from_query(  {
            "current": "Going into \"Northside Shopping Centre\"",
            "before": "I am driving to the shopping centre from work",
            "beforewhen": null,
            "after": "I am there to get new keys",
            "afterwhen": null
        })
for k in [1, 3, 5, 10, 20, 50, 100]:
    idx = []
    all_images = []
    for scene in results[:k]:
        all_images.append(scene[0])
    matches = set.intersection(set(all_images), set(targets[1]))
    if len(matches) == 0:
        print(0, end='      ')
    else:
        print(1, end='      ')

In [None]:
from collections import defaultdict
rewritten_queries = json.load(open("rewritten_queries.json"))
#stats[query_i][hint_j][hit_k]
stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
mod_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
printed = []
for i, query in enumerate(rewritten_queries):
    for j, hint in enumerate(query):
        try:
            results = get_images_from_query(hint)
        except:
            results = []
        for k in [1, 3, 5, 10, 20, 50, 100]:
            idx = []
            all_images = []
            for scene in results[:k]:
                all_images.append(scene[0])
            matches = set.intersection(set(all_images), set(targets[i]))
            if len(matches) == 0:
                stats[i][j][k] = 0
                if j > 0:
                    if mod_stats[i][j-1][k] > 0:
                        if (i, j) not in printed:
                            printed.append((i,j))
                            print(hint)
                    mod_stats[i][j][k] = mod_stats[i][j-1][k]
            else:
                stats[i][j][k] = 1
                mod_stats[i][j][k] = 1

In [None]:
import numpy as np
for hint_j in range(6):
    print(hint_j + 1, end=" & ")
    for hit_k in [1, 3, 5, 10, 20, 50, 100]:
        result_hit = []
        for i in stats:
            result_hit.append(stats[i][hint_j][hit_k])
        print(f"{np.mean(result_hit):0.2f}", end=" & ")
    print("\\\\")

In [None]:
for hint_j in range(6):
    print(hint_j + 1, end=" & ")
    mod_result_hit = []
    for hit_k in [1, 3, 5, 10, 20, 50, 100]:
        for i in stats:
            mod_result_hit.append(mod_stats[i][hint_j][hit_k])
#                 {np.mean(mod_result_hit):0.4f}
        print(f"{np.mean(mod_result_hit):0.2f}", end=" & ")
    print("\\\\")

In [None]:
import xml.etree.ElementTree as ET

ntcir13 = '/home/tlduyen/LQA/BDG/ntcir-13.xml'

descriptions = []
tree = ET.parse(ntcir13)
root = tree.getroot()
for child in root:
    for j in child:
        if j.tag == "description":
            descriptions.append("I " + j.text.split('I', 1)[1].strip('. ,'))

In [None]:
descriptions

In [None]:
from collections import defaultdict
ground_truth = defaultdict(set)
with open('/home/tlduyen/LQA/BDG/NTCIR13_Lifelog2_LSAT_gt.csv') as f:
    for line in f.readlines():
        "u1_2016-08-16_111256_2, 1"
        image, topic = line.split(', ')
        ground_truth[int(topic)].add(image[3:-2].replace('-', ''))

In [None]:
alternatives = """eating lunch
gardening in my home
visiting a castle at night
drinking coffee in a cafe
outside at sunset
visited a graveyard
lecturing to a group of people in a classroom environment
grocery shopping
worked at home late at night
working on the computer at my office desk""".split('\n')

In [None]:
alternatives = """I am eating lunch
I am gardening in my home
I am visiting a castle at night
I am drinking coffee in a cafe
I am outside at sunset
I am visiting a graveyard
I am lecturing to a group of people in a classroom environment
I am grocery shopping
I am working at home late at night
I am working on the computer at my office desk""".split('\n')

In [None]:
import pandas as pd
k = 10
final_results = []
for i in range(10):
    search_query = alternatives[i]
    try:
        results = get_images_from_query({"current": search_query, "before": "", "beforewhen": None, "after": "", "afterwhen": None})
    except:
        results = []
   
    all_images = []
    for scene in results[:k]:
        all_images.append(scene[0].split('.')[0].split('/')[-1].replace('_000', ''))
    if len(all_images) == 0:
        print(search_query)
        continue
    final_results.append(all_images)

In [None]:
score = []
for all_images, truth in zip(final_results, ground_truth.values()):
    gtp = 0
    n = 10
    p = 0
    matches = set.intersection(truth, all_images[:n])
    p = len(matches)/k
    score.append(p)
#     for i in range(n):
#         if all_images[i] in truth:
#             matches = set.intersection(truth, all_images[:i + 1])
#             p += len(matches)/(i+1)
#             gtp += 1 
#     if gtp == 0:
#         score.append(1.0)
#     else:
#         score.append(p/min(len(truth), n))
print(sum(score[:10])/10)
score[:10]

no object: 0.63, [0.8, 0.4, 0.8, 0.8, 0.0, 0.8, 0.5, 0.9, 0.7, 0.6]

present tense: 0.57, [0.7, 0.3, 0.7, 0.7, 0.0, 0.6, 0.6, 0.9, 0.8, 0.4]

past tense: 0.51, [0.2, 0.5, 0.8, 0.9, 0.0, 0.5, 0.6, 0.8, 0.7, 0.1]

In [None]:
# Wrong only
relevance_judgements = {
    1: [7],
    2: [4,5,6,7,8,9],
    3: [7,9],
    4: [7],
    5: [9,7,6,2],
    6: [],
    7: [3,5,6],
    8: [],
    9: [2, 5],
    10:[]
}

In [None]:
def AP(wrong_sub):
    subs = [i not in wrong_sub for i in range(10)]
    rel_so_far = 0
    sum = 0
    for i, sub in enumerate(subs):
        if sub:
            rel_so_far += 1
            sum += rel_so_far / (i + 1)
    if rel_so_far == 0:
        return 1
    return sum/10

def P_10(wrong_sub):
    subs = [i not in wrong_sub for i in range(10)]
    return sum(subs[:10])/10

score = []
for wrong_sub in relevance_judgements.values():
    score.append(AP(wrong_sub))
    
print(sum(score)/10)
score

In [None]:
import pandas as pd
k = 10
final_results = []
for i in range(10):
    search_query = alternatives[i]
    try:
        results = get_images_from_query({"current": search_query, "before": "", "beforewhen": None, "after": "", "afterwhen": None})
    except:
        results = []
   
    all_images = []
    for scene in results[:k]:
        all_images.append(scene[0])
    if len(all_images) == 0:
        print(search_query)
        continue
    final_results.append(all_images)

In [None]:
final_results[2][:10]

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import IPython.display

plt.figure(figsize=(16, 5))
images = final_results[0][:10]
for i, filename in enumerate(images):
    try:
        image = Image.open("/home/tlduyen/webp_images/" + filename.split('.')[0] + ".webp").convert("RGB")

        ax = plt.subplot(2, 5, i + 1)
        ax.imshow(image)
        ax.axis('off')
    except:
        print("Error in " + filename)
        continue
    
plt.tight_layout()

In [None]:
query = "outside at sunset"
truth = ground_truth[5]
try:
    results = get_images_from_query({"current": query, "before": "", "beforewhen": None, "after": "", "afterwhen": None})
except:
    results = []
all_images = []
for scene in results:
    all_images.append(scene[0].split('.')[0].split('/')[-1].replace('_000', ''))
k = 10
all_images = all_images[:k]
print([(i, image) for i,image in enumerate(all_images) if image not in truth])
matches = set.intersection(truth, all_images[:k])
p = len(matches)/k
p