In [55]:
from sklearn.preprocessing import Normalizer
from sklearn.cluster import KMeans
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input,VGG16
import os
import numpy as np
import pickle
from numpy import linalg as LA

In [79]:
#使用cnn全连接层之前，并在最后采用pooling使维度变成2D
class VGGNet:
    def __init__(self):
        # weights: 'imagenet'
        # pooling: 'max' or 'avg'
        # input_shape: (width, height, 3), width and height should >= 48
        self.input_shape = (224, 224, 3)
        self.weight = 'imagenet'
        self.pooling = 'max'
        self.model = VGG16(weights = self.weight, input_shape = (self.input_shape[0], self.input_shape[1], self.input_shape[2]), pooling = self.pooling, include_top = False)
        self.model.predict(np.zeros((1, 224, 224 , 3)))

    
    def extract_feat(self, img_path):
        img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1]))
        img = image.img_to_array(img)
        img = np.expand_dims(img, axis=0)
        img = preprocess_input(img)
        feat = self.model.predict(img)
        norm_feat = feat[0]/LA.norm(feat[0])
        return norm_feat

In [91]:
vgg=VGGNet()
vgg.model.output_shape

(None, 512)

In [92]:
#读取数据集文件
feature_pool=[]
name=[]
for each in os.listdir(r"/Users/eosrui/图像检索项目指导书与数据/ukbench/full"):
#     print("/Users/eosrui/图像检索项目指导书与数据/ukbench/test/" + each)
    feature = vgg.extract_feat("/Users/eosrui/图像检索项目指导书与数据/ukbench/full/"+ each)
    feature_pool.append(feature)
    name.append(each)
    cur_dir="/Users/eosrui/图像检索项目指导书与数据/ukbench/feature/"
    folder_name=each[:-4]
    dir_new= os.path.join(cur_dir, folder_name)
    file_name=dir_new+'.pkl'
        
    if not os.path.exists(file_name):
        with open(file_name,'wb') as file:
            pickle.dump(feature, file)
    else:
        with open(file_name,'rb')as file:
            image_feature=pickle.load(file)

In [93]:
#feature_list
if not os.path.exists("feature_list.pkl"):
    feature_list={index:feature for index, feature in enumerate (feature_pool)}
    with open('feature_list.pkl','wb') as file:
        pickle.dump(feature_list, file)
else:
    with open('feature_list.pkl','rb') as file:
        feature_list = pickle.load(file)

In [94]:
#id2name
if not os.path.exists("id2name.pkl"):
    fname={index:name for index, name in enumerate (name)}
    with open('id2name.pkl','wb') as file:
        pickle.dump(fname, file)
else:
    with open('id2name.pkl','rb') as file:
        fname = pickle.load(file)

In [95]:
#name2id
if not os.path.exists("name2id.pkl"):
    nameid={name:index for index, name in enumerate (name)}
    with open('name2id.pkl','wb') as file:
        pickle.dump(nameid, file)
else:
    with open('name2id.pkl','rb') as file:
        nameid = pickle.load(file)

In [162]:
print(feature_list[0].shape)
print(type(feature_list[0]))
print(type(feature))
print(fname[1])
print(nameid['ukbench06964.jpg'])
print(len(feature_pool))

(512,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
ukbench06964.jpg
1
10200


In [99]:
feature_pool2=np.array(feature_pool)
print(len(feature_pool))

10200


In [145]:
a=nameid['ukbench10188.jpg']
b=nameid['ukbench10189.jpg']
c=nameid['ukbench10198.jpg']
d=nameid['ukbench10196.jpg']
g=nameid['ukbench10010.jpg']
h=nameid['ukbench10011.jpg']
print(a,b,c,d,g,h)

5123 5717 7642 3195 6746 6679


In [110]:
A=feature_list[5123]
B=feature_list[5717]
C=feature_list[7642]
D=feature_list[3195]
# print(A,B,C,D)
e=np.dot(B,A.T)
f=np.dot(D,C.T)
print(e,f)

0.75141 0.7823132


In [153]:
#feature检查
from sklearn.metrics.pairwise import cosine_similarity

a=cosine_similarity([feature_list[5123]],[feature_list[5717]])
b=cosine_similarity([feature_list[7642]],[feature_list[3195]])
c=cosine_similarity([feature_list[6746]],[feature_list[6679]])
h=cosine_similarity([feature_list[5123]],[feature_list[1]])
print(a,b,c,h)

[[0.75141007]] [[0.7823132]] [[0.8695486]] [[0.36387017]]


In [160]:
import heapq

def getListMaxNumIndex(num_list,topk=4):
    '''
    获取列表中最大的前n个数值的位置索引
    '''
    max_num_index=list(map(num_list.index, heapq.nlargest(topk,num_list)))
    return max_num_index

b=[]
c=[]
for i in range(10200):
    a=cosine_similarity([feature_list[5123]],[feature_list[i]])
    b.append(a)
    c.append(i)
    
print(getListMaxNumIndex(b))


[5123, 5098, 5717, 4506]


In [164]:
d=fname[5123]
a=fname[5098]
b=fname[5717]
c=fname[4506]
print(d,a,b,c)

ukbench10188.jpg ukbench10191.jpg ukbench10189.jpg ukbench10190.jpg
