# 构建应用深层特征的图像检索系统

In [1]:
import graphlab

## 导入数据集: CIFAR-10

In [2]:
image_train = graphlab.SFrame('image_train_data/')

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1500887808.log


This non-commercial license of GraphLab Create for academic use is assigned to xyntax@163.com and will expire on July 23, 2018.


In [3]:
image_train.head(5)

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."


In [30]:
# 导入以前训练过的模型 迁移学习
deep_learning_model = graphlab.load_model('http://s3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
# 抽取特征
# 时间很慢，在数据集已经测试好
deep_learning_model.extract_features(image_train)

[ERROR] graphlab.toolkits._main: Toolkit error: Cancelled by user.


ToolkitError: Cancelled by user.

In [6]:
image_train.show()

Canvas is accessible via web browser at the URL: http://localhost:59847/index.html
Opening Canvas in default web browser.


## 通过应用KNN构建分类模型

In [9]:
knn_model = graphlab.nearest_neighbors.create(image_train, features=['deep_features'], label='id')

## 通过模型找到相似图像

In [10]:
cat = image_train[18:19]

In [11]:
cat

id,image,label,deep_features,image_array
384,Height: 32 Width: 32,cat,"[1.04403531551, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[46.0, 45.0, 50.0, 47.0, 45.0, 51.0, 45.0, 44.0, ..."


In [12]:
# 展示图像
cat['image'].show()

Canvas is updated and available in a tab in the default browser.


In [13]:
# 检索相似图像
knn_model.query(cat)

query_label,reference_label,distance,rank
0,384,0.0,1
0,6910,36.9403137951,2
0,39777,38.4634888975,3
0,36870,39.7559623119,4
0,41734,39.7866014148,5


In [14]:
# 通过id过滤结果，取出图片
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'], 'id')

In [15]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))

In [16]:
cat_neighbors

id,image,label,deep_features,image_array
384,Height: 32 Width: 32,cat,"[1.04403531551, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[46.0, 45.0, 50.0, 47.0, 45.0, 51.0, 45.0, 44.0, ..."
6910,Height: 32 Width: 32,cat,"[1.55474901199, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[154.0, 133.0, 92.0, 134.0, 112.0, 75.0, ..."
36870,Height: 32 Width: 32,cat,"[0.240483224392, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[16.0, 20.0, 19.0, 14.0, 19.0, 17.0, 11.0, 15.0, ..."
39777,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.42072105408, 0.0, ...","[145.0, 166.0, 165.0, 164.0, 185.0, 184.0, ..."
41734,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 11.6715068817, 0.0, ...","[122.0, 27.0, 34.0, 120.0, 24.0, 31.0, 11 ..."


In [17]:
cat_neighbors['image'].show()

Canvas is updated and available in a tab in the default browser.


## 找到和轿车相似的图像

In [18]:
# 第取8行数据
car = image_train[8:9]

In [19]:
car

id,image,label,deep_features,image_array
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."


In [20]:
car['image'].show()

Canvas is updated and available in a tab in the default browser.


In [22]:
# 预测并画图
get_images_from_ids(knn_model.query(car))['image'].show()

Canvas is updated and available in a tab in the default browser.


## 构造一个lambda函数来寻找和显示最近的图像

In [26]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [29]:
show_neighbors(9)

Canvas is updated and available in a tab in the default browser.


In [31]:
%save code.py 1-29

The following commands were written to file `code.py`:
import graphlab
image_train = graphlab.SFrame('image_train_data/')
image_train.head(5)
# 导入以前训练过的模型
deep_learning_model = graphlab.load_model('http://a3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
# 导入以前训练过的模型
deep_learning_model = graphlab.load_model('http://s3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
image_train.show()
knn_model = graphlab.nearest_neighbors.create(image_train, features=[image_train['deep_features'], label='id')
knn_model = graphlab.nearest_neighbors.create(image_train, features=image_train['deep_features'], label='id')
knn_model = graphlab.nearest_neighbors.create(image_train, features=['deep_features'], label='id')
cat = image_train[18:19]
cat
cat['image'].show()
knn_model.query(cat)
# 通过id过滤结果，取出图片
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'], 'id')
cat_neighbors = get_images_from_ids(knn_model.query(