In [1]:
import sys
sys.path.append("../")
from pycocotools.coco import COCO
from sentence_transformers import SentenceTransformer
from services.search import ImageRepresentations, SearchService
from eval.metrics import Metrics
import json
import os
import random
from services.settings import settings

  from tqdm.autonotebook import tqdm, trange


In [2]:
# set parameters
k = 10
threshold = 0.3
last_index = 500

options_without_all = ["first", "concat","random"]

all_queries_path = os.path.join(settings.project_root_dir,"src/eval/extended_queries.json" )
all_queries = json.load(open(all_queries_path))
response_dict_path = os.path.join(settings.output_dir, "response_dict_" + str(last_index) +".json")

encoder_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
url_prefix="http://images.cocodataset.org/val2017/"



In [3]:
all_queries["categories"]["object"]

['birthday cake',
 'wooden chair',
 'antique pocket watch',
 'crystal wine glasses',
 'vintage sofa']

In [4]:
# COCO RELATED FUNCTIONS
def set_coco_object(data_dir):
    ann_file = os.path.join(data_dir, 'coco-images/annotations/captions_val2017.json')
    coco = COCO(ann_file)
    return coco

def get_coco_ids_from_filenames(coco, filename_list):
    # drop the .jpg and convert to int
    filename_list = [int(filename[:-4]) for filename in filename_list]
    return filename_list
    
def get_coco_captions_by_index(coco, image_id):
    annIds = coco.getAnnIds(imgIds=image_id)
    anns = coco.loadAnns(annIds)
    captions = [ann['caption'] for ann in anns]
    return captions

def aggregate_captions(captions,mode):
    if mode == "first":
        return captions[0]
    elif mode == "concat":
        return " ".join(captions)
    elif mode == "random":
        return random.choice(captions)
    else:
        raise ValueError("Invalid mode")

def get_aggregated_caption_list(coco, mode, filenames):
    ids = get_coco_ids_from_filenames(coco, filenames)
    aggregated_caption_list = []
    for i in ids:
        captions = get_coco_captions_by_index(coco, i)
        aggregated_caption_list.append(aggregate_captions(captions, mode))
    return aggregated_caption_list

# set coco object
coco_object = set_coco_object(settings.data_dir)

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!


In [5]:
# Metric related functions
def get_scores(retrieved_files, gt_retrieved_files, performance_dict):
    performance_dict["precision"].append(Metrics.precision(retrieved_files, gt_retrieved_files))
    performance_dict["recall"].append(Metrics.recall(retrieved_files, gt_retrieved_files))
    performance_dict["f1"].append(Metrics.f1_score(retrieved_files, gt_retrieved_files))
    performance_dict["nDCG"].append(Metrics.ndcg(retrieved_files, gt_retrieved_files, 5))
    performance_dict["mRR"].append(Metrics.mrr(retrieved_files, gt_retrieved_files))
    return performance_dict


In [6]:
# set git's search service
git_predicted_file = json.load(open(response_dict_path))
filenames = list(git_predicted_file.keys())
git_image_representions = ImageRepresentations(filenames=filenames,
                                               representations=list(git_predicted_file.values()), 
                                               url_prefix=url_prefix)

git_ss_k = SearchService(image_representations=git_image_representions, encoder_model=encoder_model, k=10)
git_ss_thresh = SearchService(image_representations=git_image_representions, encoder_model=encoder_model, threshold=0.3)

FileNotFoundError: [Errno 2] No such file or directory: '/usr/prakt/s0077/vlm-based-image-search/outputs/response_dict_500.json'

In [None]:
for option in options_without_all:
    truth_image_reps = ImageRepresentations(filenames=list(git_predicted_file.keys()),
                                                            representations=get_aggregated_caption_list(coco_object, option, filenames), 
                                                            url_prefix=url_prefix)
    truth_ss_k = SearchService(image_representations=truth_image_reps, encoder_model=encoder_model, k=10)
    truth_ss_thresh = SearchService(image_representations=truth_image_reps, encoder_model=encoder_model, threshold=0.3)
    print("Option", option)
    for category in all_queries["categories"].keys():
        query_group = all_queries["categories"][category]

        # try with k 
        performance_dict = {"precision": [], "recall": [], "f1": [], "nDCG": [], "mRR": []}
        print("Category: %s, Method: k=10" % (category))
        for query in query_group:
            retrieved_files = git_ss_k.search(query)
            true_retrieved_files = truth_ss_k.search(query)
            performance_dict = get_scores(retrieved_files, true_retrieved_files, performance_dict)
        avg_performance_dict = {k: sum(v) / len(v) for k, v in performance_dict.items()}
        # round all values to 3 decimal places
        avg_performance_dict = {k: round(v, 3) for k, v in avg_performance_dict.items()}
        print(avg_performance_dict)
        
        # try with threshold
        for query in query_group:
            retrieved_files = git_ss_thresh.search(query)
            true_retrieved_files = truth_ss_thresh.search(query)
            performance_dict = get_scores(retrieved_files, true_retrieved_files, performance_dict)
        avg_performance_dict = {k: sum(v) / len(v) for k, v in performance_dict.items()}
        # round all values to 3 decimal places
        avg_performance_dict = {k: round(v, 3) for k, v in avg_performance_dict.items()}
        print("Category: %s, Method: threshold=0.3" % (category))
        print(avg_performance_dict)


Index created with 500 sentences
Index created with 500 sentences
Option first
Category: object, Method: k=10
{'precision': 0.48, 'recall': 0.48, 'f1': 0.48, 'nDCG': 0.566, 'mRR': 1.0}
Category: object, Method: threshold=0.3
{'precision': 0.455, 'recall': 0.457, 'f1': 0.45, 'nDCG': 0.746, 'mRR': 0.961}
Category: action, Method: k=10
{'precision': 0.46, 'recall': 0.46, 'f1': 0.46, 'nDCG': 0.858, 'mRR': 1.0}
Category: action, Method: threshold=0.3
{'precision': 0.518, 'recall': 0.442, 'f1': 0.467, 'nDCG': 0.891, 'mRR': 0.983}
Category: objects_with_count, Method: k=10
{'precision': 0.34, 'recall': 0.34, 'f1': 0.34, 'nDCG': 0.713, 'mRR': 1.0}
Category: objects_with_count, Method: threshold=0.3
{'precision': 0.346, 'recall': 0.326, 'f1': 0.334, 'nDCG': 0.619, 'mRR': 0.879}
Category: reasoning, Method: k=10
{'precision': 0.34, 'recall': 0.34, 'f1': 0.34, 'nDCG': 0.664, 'mRR': 1.0}
Category: reasoning, Method: threshold=0.3
{'precision': 0.324, 'recall': 0.291, 'f1': 0.302, 'nDCG': 0.546, 'm

In [None]:
##### CONTENTS FROM EMRAH'S SCRIPT IN CLIP_SEARCH.PY #####
def prep_dataset_and_index(model_name, batch_size, option):
    print("loading coco...")
    coco_dataset = load_cocos_like_dataset_in_range(5, model_name,  last_index=last_index,  option=option)
    print("creating embedding service...")
    embedding_service = EmbeddingService(model_name, coco_dataset, "mscoco")
    print("start embedding %s images..." % len(coco_dataset))
    index, text_embeddings = embedding_service.get_embeddings(batch_size)
    # i just learned how to use functions of subsets of datasets so i kept it :D
    print("first image's filename:",coco_dataset.dataset.get_filename(0))
    print(text_embeddings[0].shape)
    
    return index, text_embeddings, coco_dataset

In [None]:
index, text_embeddings, coco_dataset = prep_dataset_and_index(model_name, batch_size, options[0])

In [None]:
def embed_query(query, encoder_model):
    # encode the query
    embedded_query = encoder_model.encode(query, convert_to_tensor=True)
    # normalize the query
    embedded_query = embedded_query / embedded_query.norm(dim=-1, keepdim=True)
    return embedded_query

In [None]:
query = "cake"

embedded_query = embed_query(query, encoder_model)
print(embedded_query.shape)

# search for the query
# D, I = index.search(embedded_query, k)
