In [1]:
import faiss
import os
import pandas as pd
import numpy 
import glob
import numpy as np
from skimage import measure, transform, feature
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import ml_metrics as metrics
%matplotlib inline

In [2]:
# DB details
NPZ_FEAT_SRC = "/local/cs572/rchan31/codespace/BaselinePrototype/npz_file_saves/COMPACT_NPZ_DELF_FEATS/"
FAISS_INDEX_FILE = "/local/cs572/rchan31/fINALwEEK/FAISSIndex/FAISS_delf_v1.index"
IMG_SRC_PATH = "/local/cs572/cziems/index/"

DB_LOOKUP_CSV = "/local/cs572/rchan31/fINALwEEK/csv_lookups/FAISS_delf_v1_CSV_lookup.csv"
N_INDEX = 952133

In [3]:
cpu_index = faiss.read_index(FAISS_INDEX_FILE)

cpu_index.ntotal

952133

In [16]:
# Query details
Q_NPZ_FEAT_SRC = "/local/cs572/rchan31/fINALwEEK/COMPACT_NPZ_DELF_test_FEATS"
Q_IMG_SRC_PATH = "/local/cs572/cziems/test/"

Q_LOOKUP_CSV = "/local/cs572/rchan31/fINALwEEK/csv_lookups/processed_retrieval_solutions.csv"
Q_N_INDEX = 667

INITIAL_RANKED_LIST_CSV = "/local/cs572/rchan31/fINALwEEK/csv_lookups/initial_retrievals_delf_v1.csv"

In [5]:
# query-collection
query_df = pd.read_csv(Q_LOOKUP_CSV)
query_image_IDs = query_df["id"]
len(query_image_IDs)

667

In [6]:
quer_vecs = []
quer_ids = {}
for i, fn in enumerate(glob.glob(os.path.join(Q_NPZ_FEAT_SRC, "*.npz"))):
    ID = os.path.basename(fn)[:-4]
    vec = np.load(fn)["descriptors"]
    quer_vecs.append(np.asarray(vec[0, :], dtype=np.float32))
    quer_ids[i] = ID
quer_vecs = np.array(quer_vecs)

quer_vecs.shape

(667, 40)

In [7]:
TOP_K = 100

cpu_index.nprobe = 100

distances, indices = cpu_index.search(quer_vecs, TOP_K)
print(distances.shape)
print(indices.shape)

indices

(667, 100)
(667, 100)


array([[736715, 363666, 948517, ..., 719896,  43536, 851043],
       [501075, 543586, 575723, ..., 623293, 119295, 444459],
       [229099, 775990, 410356, ..., 779317, 594871, 644500],
       ...,
       [255989, 528209, 108180, ..., 543274, 807051, 611768],
       [568700, 641612, 916228, ..., 115030, 130539, 948178],
       [406825, 673593, 472827, ..., 773443, 390467, 884963]])

In [8]:
db_df = pd.read_csv(DB_LOOKUP_CSV)
db_df.head()

Unnamed: 0.1,Unnamed: 0,filenames
0,0,00002469b818f290
1,1,0000298d976221f3
2,2,00002e4e382333bf
3,3,000036329c35b65c
4,4,0000394edaaa55b1


In [14]:
def match_initial_retrievals(q_idx, query_id): # per query
    
    db_ret_img_IDs = []
    for topk_idx, index_num in enumerate(indices[q_idx]):
        
        if index_num == -1:
            continue
        
        fileID = db_df.loc[index_num]["filenames"]
        db_ret_img_IDs.append(fileID)
    
    if len(db_ret_img_IDs) != TOP_K:
        db_ret_img_IDs = db_ret_img_IDs + ['None'] * (TOP_K-len(db_ret_img_IDs))

    return db_ret_img_IDs


ranked_q_db_pairs = []
for q_idx, query_row in query_df.iterrows():
    db_ret_img_IDs = match_initial_retrievals(q_idx, query_row["id"])
    ranked_q_db_pairs.append(db_ret_img_IDs)
    
    print("............ Finished retrieving for query : %s.jpg : %d / %d" % (query_row["id"], (q_idx + 1), query_df.shape[0]))
    
        
retrieval_df = pd.DataFrame(ranked_q_db_pairs)
retrieval_df.insert(0, "queryImageID", query_df["id"])

............ Finished retrieving for query : f60a52c609ab0208.jpg : 1 / 667
............ Finished retrieving for query : 4fa678d60ab4bc23.jpg : 2 / 667
............ Finished retrieving for query : 5be9d8002f8dc60e.jpg : 3 / 667
............ Finished retrieving for query : 0810084dfbce8ea8.jpg : 4 / 667
............ Finished retrieving for query : bb8ca55b60e8cff4.jpg : 5 / 667
............ Finished retrieving for query : c0c270330090bab6.jpg : 6 / 667
............ Finished retrieving for query : 6100f8cc6c68c67f.jpg : 7 / 667
............ Finished retrieving for query : 2e77b0bbdd4d2677.jpg : 8 / 667
............ Finished retrieving for query : 1934744456f5f29c.jpg : 9 / 667
............ Finished retrieving for query : 7929c900163556db.jpg : 10 / 667
............ Finished retrieving for query : 42b9fd64aa1b43bc.jpg : 11 / 667
............ Finished retrieving for query : b39ff185206cf76b.jpg : 12 / 667
............ Finished retrieving for query : e7a124993271a25a.jpg : 13 / 667
........

In [18]:
retrieval_df.to_csv(INITIAL_RANKED_LIST_CSV)
retrieval_df.head()

Unnamed: 0,queryImageID,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,f60a52c609ab0208,c62a9ca23e45420f,61f08305fbc9f806,ff039f52ca878504,f258c7eb40fdc8cf,7c9c6f52fd38e29b,5cbb626ced45ba96,9ae068672e7d8270,61e5ac7b27f26e15,c800b2f0e4d23943,...,8e0eaa513a8af4db,d27ece3ea4f076eb,ceac48e131c8b88d,2389c6f3ae0d098c,f82730e8dac3257e,dc7854a19ce80576,b9c367d64335ecd3,c1a50fc8a5713159,0bb3b80c15238c02,e4d4026892f0f21a
1,4fa678d60ab4bc23,86b37c8c6c559b21,921f02216a181a18,9adc544d34892d6a,720382171af17c2f,8b21751541e80559,e25485a00530097d,5f443f06cae15cd5,3f84e9a136feffa2,d2726ef324ceb7e8,...,caddb647d2cbaf39,47367775e52cd791,735ed8d385f4d91e,25ec89b5e07e16b8,9d8dfe0b2e7d772d,348c5d5e6ba34483,58462343843646b0,a7b375b6a66d338c,201e24b5cea6c46d,777ef9d9e67e75b8
2,5be9d8002f8dc60e,3dcfb68630dae905,d0abe024e8e92459,6e709c5b726a7527,4327234a4d27d181,eebf7fc00415e13e,fc596e3a813e548b,899fc319f90c23e3,190b40938e2bd088,a6ee7fbd054d8e16,...,01adea145c9c88c7,4d6014220144c17f,96d55734414382af,317fd548bf0ce7ea,fab2be691667f47c,9ef0c17f26803ead,6b05227e25fdb494,d18e8002a1b905aa,a012ef3d8a344176,ad60afea0399f43b
3,0810084dfbce8ea8,70e8730fe483d70c,5114c8e3265ada3d,aeba6ccde4c770ff,3330b87594b7dbfc,972862b79aa07753,0944c9f723b5b4ed,8307c5fff7779d70,5d48cd80a39de57c,fa94b601d2382721,...,e145410873532e33,b6c21875e44aafad,645c68dc24280ff7,a392d62ee37ed23f,1f901aa928f363f9,e763186d9699dfdf,24e70e3c2250dfdf,2caaf7b48d2d2075,ad12758119ab6331,25639c8e403a3827
4,bb8ca55b60e8cff4,e714bc41d5d7084c,d6433b3ad4f2258f,db518a09119c7b1f,0d37fbeeac8a6534,2214d68952b1c5d1,1e897b3ee5a22f43,738bd050be438d37,71b470bc516392da,4de111f0414cadd0,...,0cd02ad748bc87ca,3e6230a8ffd06118,7677994e9d7eb07f,dac8cd061216191d,c3cf76113dee4e73,06c0a98dd640b970,7666863f158d4434,75ab910f37a7ed3d,3fff27ee908d12d1,be06e9d1a41cd538


In [21]:
## Removing first-columns : imageID
# cropping out only the retrieved-db-image IDs
retrieved_solution = retrieval_df.iloc[:, 1:] 
retrieved_solution = retrieved_solution.values.tolist()
print(len(retrieved_solution[0]))
len(retrieved_solution)

100


667

In [23]:
## Removing first-columns : imageID
# cropping out only the q-image IDs
solution_df = pd.read_csv(Q_LOOKUP_CSV)
grountruth_solution = solution_df.iloc[:, 2:]
grountruth_solution = grountruth_solution.values.tolist()
print(len(grountruth_solution[0]))
len(grountruth_solution)

100


667

In [24]:
mAPk_cum = 0
TOP_K = 100
for q in range(len(retrieved_solution)):
    temp_mAP = metrics.mapk(grountruth_solution[q], retrieved_solution[q], TOP_K)
    print("mAP for query index :%s : %s" % (q, temp_mAP))
    mAPk_cum += temp_mAP

mAPk_avg = mAPk_cum/len(retrieved_solution)
print(mAPk_cum)
print("mAP across first all queries : ", mAPk_avg) # 0.1916305682880129

mAP for query index :0 : 0.27055380990537237
mAP for query index :1 : 0.13931790258352758
mAP for query index :2 : 0.22167500728438227
mAP for query index :3 : 0.07963972138972139
mAP for query index :4 : 0.0776217176920302
mAP for query index :5 : 0.21106008488039738
mAP for query index :6 : 0.21895474924381172
mAP for query index :7 : 0.2876876474220224
mAP for query index :8 : 0.2870248345404595
mAP for query index :9 : 0.2588920801420801
mAP for query index :10 : 0.2377321818806194
mAP for query index :11 : 0.13222198114385614
mAP for query index :12 : 0.18150006417193917
mAP for query index :13 : 0.08332967986874237
mAP for query index :14 : 0.1083557250041625
mAP for query index :15 : 0.2561120520104895
mAP for query index :16 : 0.2608258893883894
mAP for query index :17 : 0.1178507881007881
mAP for query index :18 : 0.09646769203019204
mAP for query index :19 : 0.2784129186785437
mAP for query index :20 : 0.07085672920829171
mAP for query index :21 : 0.2686199789793539
mAP for q