# Image retrieval using deep features

In [55]:
import turicreate

In [56]:
turicreate.visualization.set_target('auto')

# Load some CIFAR-10 images

In [61]:
image_data = turicreate.SFrame('./image_train_data/')

In [63]:
image_data['image'].explore()

ValueError: Value must be a nonnegative integer or None

# Compute deep features from our images

In [None]:
#deep_learning_model = turicreate.load_model('imagenet_model_iter45')
#image_data['deep_features'] = deep_learning_model.extract_features(image_data)

In [64]:
image_data

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.24287176132202148, 1.0954537391662598, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.5250879526138306, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.5660159587860107, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.129795789718628, 0.0, 0.0, 0.7781944870948792, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.7178692817687988, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
97,Height: 32 Width: 32,automobile,"[1.5781855583190918, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.22067785263061523, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464221954346, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.57378625869751, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
138,Height: 32 Width: 32,bird,"[0.6589357256889343, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."


# Create a nearest neighbors model to retrieve images from deep features

In [65]:
knn_model = turicreate.nearest_neighbors.create(image_data,
                                               features = ['deep_features'],
                                               label = 'id')

# Use image retrieval model with deep features to find similar images

In [66]:
cat = image_data[18:19]

In [None]:
cat['image'].explore()

ValueError: Value must be a nonnegative integer or None

In [67]:
knn_model.query(cat)

query_label,reference_label,distance,rank
0,384,0.0,1
0,6910,36.94031379506751,2
0,39777,38.46348889747021,3
0,36870,39.75596231189652,4
0,41734,39.786601414815024,5


In [68]:
def get_images_from_ids(query_result):
    return image_data.filter_by(query_result['reference_label'],'id')

In [69]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))

In [None]:
cat_neighbors['image'].explore()

# Find images similar to a car

In [70]:
car = image_data[8:9]

In [None]:
car['image'].explore()

ValueError: Value must be a nonnegative integer or None

In [71]:
get_images_from_ids(knn_model.query(car))['image'].explore()

ValueError: Value must be a nonnegative integer or None

# Create a lambda function to find and show nearest neighbors to an image

In [72]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_data[i:i+1]))['image'].explore()

In [None]:
show_neighbors(8)

ValueError: Value must be a nonnegative integer or None

In [None]:
show_neighbors(26)

ValueError: Value must be a nonnegative integer or None

In [None]:
show_neighbors(500)

ValueError: Value must be a nonnegative integer or None

# Programming assignment

## Compute summary statistics of the data

In [73]:
# sketch = turicreate.Sketch(image_data['label'])
sketch = image_data['label'].summary()
sketch


+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+------------+-------+
|   value    | count |
+------------+-------+
|    cat     |  509  |
|    dog     |  509  |
| automobile |  509  |
|    bird    |  478  |
+------------+-------+


## Create category-specific image retrieval models

In [74]:
cat_training_data = image_data[image_data['label'] == 'cat']
dog_training_data = image_data[image_data['label'] == 'dog']
automobile_training_data = image_data[image_data['label'] == 'automobile']
bird_training_data = image_data[image_data['label'] == 'bird']

In [75]:
cat_model = turicreate.nearest_neighbors.create(cat_training_data, features = ['deep_features'], label = 'id')
dog_model = turicreate.nearest_neighbors.create(dog_training_data, features=['deep_features'], label='id')
automobile_model = turicreate.nearest_neighbors.create(automobile_training_data, features=['deep_features'], label='id')
bird_model = turicreate.nearest_neighbors.create(bird_training_data, features=['deep_features'], label='id')

In [76]:
image_test = turicreate.SFrame('./image_test_data')

In [77]:
cat_test = image_test[0:1]

In [78]:
from PIL import Image
import io

In [79]:
cat_model_result = cat_model.query(cat_test)
image_array = turicreate.SArray.to_numpy(cat_training_data[cat_training_data['id'] == 16289]['image_array'])

image = turicreate.Image.show(image_array)

AttributeError: 'numpy.ndarray' object has no attribute '_to_pil_image'

In [80]:
dog_model_result = dog_model.query(cat_test)
dog_model_result

query_label,reference_label,distance,rank
0,16976,37.464262878423774,1
0,13387,37.56668321685285,2
0,35867,37.60472670789396,3
0,44603,37.70655851529755,4
0,6094,38.51132549073972,5


## Try a simple example of nearest-neighbors classification

In [88]:
cat_model_result['distance'].mean()

36.15573070978294

In [89]:
dog_model_result['distance'].mean()

37.77071136184157

## Compute nearest neighbors accuracy

In [90]:
cat_test_data = image_test[image_test['label' == 'cat']]
dog_test_data = image_test[image_test['label'] == 'dog']
automobile_test_data = image_test[image_test['label'] == 'automobile']
bird_test_data = image_test[image_test['label'] == 'bird']

In [91]:
dog_cat_neighbours = cat_model.query(dog_test_data, k=1)
dog_dog_neighbours = dog_model.query(dog_test_data, k=1)
dog_automobile_neighbours = automobile_model.query(dog_test_data, k=1)
dog_bird_neighbours = bird_model.query(dog_test_data, k=1)

In [92]:
(dog_dog_neighbours['distance'] < dog_cat_neighbours['distance']).value_counts()['value' == 1]

{'value': 1, 'count': 722}

In [93]:
(dog_dog_neighbours['distance'] < dog_automobile_neighbours['distance']).value_counts()['value' == 1]

{'value': 1, 'count': 980}

In [94]:
(dog_dog_neighbours['distance'] < dog_bird_neighbours['distance']).value_counts()['value' == 1]

{'value': 1, 'count': 886}

In [95]:
dog_distances = turicreate.SFrame()

In [96]:
dog_distances['dog-dog'] = dog_dog_neighbours['distance']
dog_distances['dog-cat'] = dog_cat_neighbours['distance']
dog_distances['dog-automobile'] = dog_automobile_neighbours['distance']
dog_distances['dog-bird'] = dog_bird_neighbours['distance']

In [97]:
def is_dog_correct(row):
  if row['dog-cat'] < row['dog-dog'] or row['dog-automobile'] < row['dog-dog'] or row['dog-bird'] < row['dog-dog']:
    return 0
  else:
    return 1

In [102]:
dog_distances.apply(is_dog_correct).sum() / len(dog_distances)

0.678