In [19]:
import os
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16.0, 16.0)
plt.rcParams['image.interpolation'] = 'nearest'


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
#import 
from cbir.model import get_model
from cbir.features import extract_raw_features
from cbir.utils import load_image, get_list, load_npy_files, covert_data_format
from cbir.utils import run_feature_processing_pipeline
from cbir.crow import apply_crow_aggregation
from cbir.query import compute_distances, compute_cosin_distance
from cbir.rmac import apply_rmac_aggregation
from cbir.utils import result_ap, result_precision, get_list_set
from cbir.query import simple_query_expansion, reranking

model = get_model()
img_path='/home/jy/dataset/fabric_dataset/train'
raw_out_path='/home/jy/results/raw/fabric'
crow_out_path='/home/jy/results/crow/fabric'
rmac_out_path='/home/jy/results/rmac/fabric'
rank_file = '/home/jy/dataset/fabric_dataset/ground_truth.txt'
query_file_path = '/home/jy/dataset/fabric_dataset/query.txt'

In [24]:
#extract features
img_list = get_list(img_path)
print('total number: %d ' % len(img_list))
t_start = time.time()
for i,file_path in enumerate(img_list):
    if i % 100 == 0:
        print('process %.2f .... time elapse: %.2f s' % ((i/(len(img_list)))*100,
                                                      (time.time()-t_start)))
        #print(file_path)    
    
    img = load_image(file_path,(512,512))
    # layer block5
    raw_feature = extract_raw_features(model,img)
    #print(raw_feature.shape)
    #np.save(os.path.join(raw_out_path,os.path.basename(file_path).split('.')[0]), raw_feature)
    
    cvt_raw_feature = covert_data_format(raw_feature)
    #print(cvt_raw_feature.shape)
    
    # crow feature
    crow_feature = apply_crow_aggregation(cvt_raw_feature)
    #print(crow_feature.shape,crow_feature[0,:10])
    #np.save(os.path.join(crow_out_path,os.path.basename(file_path).split('.')[0]), crow_feature)

    # rmac feature
    rmac_feature = apply_rmac_aggregation(cvt_raw_feature)
    #print(rmac_feature.shape,rmac_feature[1,:10])
    np.save(os.path.join(rmac_out_path,os.path.basename(file_path).split('.')[0]), rmac_feature)
    
print('total time : %f s' % ((time.time()-t_start),))
del t_start
del img_list

total number: 990 
process 0.00 .... time elapse: 0.00 s
process 10.10 .... time elapse: 5.07 s
process 20.20 .... time elapse: 10.03 s
process 30.30 .... time elapse: 15.04 s
process 40.40 .... time elapse: 20.03 s
process 50.51 .... time elapse: 24.99 s
process 60.61 .... time elapse: 30.03 s
process 70.71 .... time elapse: 35.07 s
process 80.81 .... time elapse: 40.15 s
process 90.91 .... time elapse: 45.24 s
total time : 49.823053 s


In [25]:
#load features from file
t_start = time.time()
crow_features_list = get_list(crow_out_path, 'npy')
rmac_features_list = get_list(rmac_out_path, 'npy')

crow_features,crow_files = load_npy_files(crow_features_list)
rmac_features,rmac_files = load_npy_files(rmac_features_list)
print('load time %f s' % (time.time() - t_start,))
#print(rmac_features[0][:10], rmac_files[1])
#print(crow_features[0][:10], crow_files[1])
del t_start
del crow_features_list
del rmac_features_list

successful loading of npy file !!!
successful loading of npy file !!!
load time 0.706952 s


In [26]:
#ground truth 
img_list = get_list(img_path)
rank = 0
rank_type = dict()
final_dict = dict()
for i in img_list:
    tmp = os.path.splitext(os.path.basename(i))[0]
    prefix_file = tmp.split('_')[0]
    if rank_type.get(prefix_file) is None:
        rank_type[prefix_file] = rank
        rank = rank + 1
    final_dict[os.path.basename(tmp)] = rank 

#save to file    
#with open(rank_file, 'w') as f:
#    f.writelines([k+' '+str(v)+ '\n' for k,v in sorted(final_dict.items())])

# clear memory 
del img_list
del rank
del rank_type

In [27]:
#load query list
query_list=list()
query_crow_features=list()
query_rmac_features=list()
#model=get_model()
t_start = time.time()
with open(query_file_path,'r') as f:
    for i in f.readlines():
        #print(i,end='')
        query_list.append(i.split('.jpg')[0])
        img = load_image(os.path.join(img_path,i.strip()))
        raw_feature = extract_raw_features(model,img)
        cvt_raw_feature = covert_data_format(raw_feature)
        crow_feature = apply_crow_aggregation(cvt_raw_feature)
        rmac_feature = apply_rmac_aggregation(cvt_raw_feature)
        query_crow_features.append(crow_feature)
        query_rmac_features.append(rmac_feature)
print('total query image %d' % (len(query_list),))
#print(query_list[0])
#print(len(query_crow_features))
#print(len(query_rmac_features))
query_crow_features = np.vstack(query_crow_features)
query_rmac_features = np.vstack(query_rmac_features)
print(query_crow_features.shape)
print(query_rmac_features.shape)
all_query_crow_features = query_crow_features
all_query_rmac_features = query_rmac_features

print('time elapse: %f s' % (time.time() - t_start,))
del t_start

total query image 165
(165, 512)
(165, 512)
time elapse: 4.002818 s


In [46]:
#do pca 
do_pca=True
pca_dims = 256

all_crow_features = crow_features
all_rmac_features = rmac_features


if do_pca:
    crow_pca_features, crow_pca_params = run_feature_processing_pipeline(crow_features,
                                                                        d = pca_dims,
                                                                        copy = True)
    rmac_pca_features, rmac_pca_params = run_feature_processing_pipeline(rmac_features,
                                                                        d = pca_dims,
                                                                        copy = True)
    all_crow_features = crow_pca_features
    all_rmac_features = rmac_pca_features
    print(crow_pca_features.shape)
    print(rmac_pca_features.shape)
    print(crow_pca_params['pca'])
    print(rmac_pca_params['pca'])
    
    
    query_crow_pca_features, _ = run_feature_processing_pipeline(query_crow_features,
                                                                        d = pca_dims,
                                                                        copy = True,
                                                                        params=crow_pca_params)
    query_rmac_pca_features, _ = run_feature_processing_pipeline(query_rmac_features,
                                                                        d = pca_dims,
                                                                        copy = True,
                                                                        params=rmac_pca_params)
    all_query_crow_features = query_crow_pca_features
    all_query_rmac_features = query_rmac_pca_features
    print(query_crow_pca_features.shape)
    print(query_rmac_pca_features.shape)

(990, 256)
(990, 256)
PCA(copy=True, iterated_power='auto', n_components=256, random_state=None,
  svd_solver='auto', tol=0.0, whiten=True)
PCA(copy=True, iterated_power='auto', n_components=256, random_state=None,
  svd_solver='auto', tol=0.0, whiten=True)
(165, 256)
(165, 256)


In [47]:
#distance compute

do_QE = True
do_RE = False
topK = 10
aps=list()

for i, f in enumerate(all_query_crow_features):
    idxs, rank_dists, rank_names = compute_cosin_distance([f], all_crow_features, crow_files)
    pos,neg = get_list_set(query_list[i],final_dict)
    #print('    query name : %s' % query_list[i])
    #print(pos,len(neg))
    #print(rank_names[:10])
    #print('ap: %f ' % (result_precision(pos,neg,rank_names[:100]),))
    if do_QE:
        Q = simple_query_expansion([f], all_crow_features, idxs, top_k=topK)
        #print(Q.shape)
        #idxs, rank_dists, rank_names = compute_distances(Q, crow_features, crow_files)
        idxs, rank_dists, rank_names = compute_cosin_distance(Q, all_crow_features, crow_files)
        #print(rank_names[:topK])
        #ap = result_precision(pos,neg,rank_names[:topK])
        ap = result_ap(pos,neg,rank_names[:topK])
        #print('    qe ap: %f ' % (ap,))
    
    if do_RE:
        rank_names = reranking(Q, all_crow_features, idxs, rank_names)
        #print(rank_names[:10])
        #ap = result_precision(pos,neg,rank_names[:topK])
        ap = result_ap(pos,neg,rank_names[:topK])
        #print('rerank ap: %f ' % (ap,))
    aps.append(ap)
    #print('query name: %s , ap %f ' %(query_list[i],ap))

print('mAP %f' % (np.array(aps).sum()/len(aps),))

mAP 0.149433


In [48]:
#distance compute

do_QE = True
do_RE = False
topK = 10
aps=list()

for i, f in enumerate(all_query_rmac_features):
    idxs, rank_dists, rank_names = compute_cosin_distance([f], all_rmac_features, rmac_files)
    pos,neg = get_list_set(query_list[i],final_dict)
    #print('    query name : %s' % query_list[i])
    #print(pos,len(neg))
    #print(rank_names[:10])
    #print('ap: %f ' % (result_precision(pos,neg,rank_names[:topK]),))
    if do_QE:
        Q = simple_query_expansion([f], all_rmac_features, idxs, top_k=topK)
        #print(Q.shape)
        #idxs, rank_dists, rank_names = compute_distances(Q, crow_features, crow_files)
        idxs, rank_dists, rank_names = compute_cosin_distance(Q, all_rmac_features, rmac_files)
        #print(rank_names[:topK])
        #ap = result_precision(pos,neg,rank_names[:topK])
        ap = result_ap(pos,neg,rank_names[:topK])
        #print('    qe ap: %f ' % (ap,))
    
    if do_RE:
        rank_names = reranking(Q, all_rmac_features, idxs, rank_names)
        #print(rank_names[:10])
        #ap = result_precision(pos,neg,rank_names[:topK])
        ap = result_ap(pos,neg,rank_names[:topK])
        #print('rerank ap: %f ' % (ap,))
    aps.append(ap)
    #print('query name: %s , ap %f ' %(query_list[i],ap))

print('mAP %f' % (np.array(aps).sum()/len(aps),))

mAP 0.119072
