#Building an image retrieval system with deep features


#Fire up GraphLab Create

In [None]:
import graphlab

#Load the CIFAR-10 dataset

We will use a popular benchmark dataset in computer vision called CIFAR-10.  

(We've reduced the data to just 4 categories = {'cat','bird','automobile','dog'}.)

This dataset is already split into a training set and test set. In this simple retrieval example, there is no notion of "testing", so we will only use the training data.

In [None]:
image_train = graphlab.SFrame('image_train_data/')
image_test = graphlab.SFrame('image_test_data/')

#Computing deep features for our images

The two lines below allow us to compute deep features.  This computation takes a little while, so we have already computed them and saved the results as a column in the data you loaded. 

(Note that if you would like to compute such deep features and have a GPU on your machine, you should use the GPU enabled GraphLab Create, which will be significantly faster for this task.)

In [None]:
#deep_learning_model = graphlab.load_model('http://s3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
#image_train['deep_features'] = deep_learning_model.extract_features(image_train)

In [None]:
image_train.head()

#Train a nearest-neighbors model for retrieving images using deep features

We will now build a simple image retrieval system that finds the nearest neighbors for any image.

In [None]:
knn_model = graphlab.nearest_neighbors.create(image_train,
                                              features=['deep_features'],
                                              label='id')

#Use image retrieval model with deep features to find similar images

Let's find similar images to this cat picture.

In [None]:
graphlab.canvas.set_target('ipynb')
cat = image_train[18:19]
cat['image'].show()

In [None]:
knn_model.query(cat)

We are going to create a simple function to view the nearest neighbors to save typing:

In [None]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'],'id')

In [None]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))

In [None]:
cat_neighbors['image'].show()

Very cool results showing similar cats.

##Finding similar images to a car

In [None]:
car = image_train[8:9]
car['image'].show()

In [None]:
get_images_from_ids(knn_model.query(car))['image'].show()

#Just for fun, let's create a lambda to find and show nearest neighbor images

In [None]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [None]:
show_neighbors(8)

In [None]:
show_neighbors(26)

In [None]:
image_train['label'].sketch_summary()

In [None]:
image_train_automobile = image_train[image_train['label']=='automobile']
image_train_cat = image_train[image_train['label']=='cat']
image_train_dog = image_train[image_train['label']=='dog']
image_train_bird = image_train[image_train['label']=='bird']

In [None]:
automobile_model = graphlab.nearest_neighbors.create(image_train_automobile,
                                                     features=['deep_features'],
                                                     label='id')

In [None]:
cat_model = graphlab.nearest_neighbors.create(image_train_cat,
                                              features=['deep_features'],
                                              label='id')

In [None]:
dog_model = graphlab.nearest_neighbors.create(image_train_dog,
                                              features=['deep_features'],
                                              label='id')

In [None]:
bird_model = graphlab.nearest_neighbors.create(image_train_bird,
                                               features=['deep_features'],
                                               label='id')

In [None]:
cat_model.query(image_test[0:1])

In [None]:
def get_images_from_cat(query_result):
    return image_train_cat.filter_by(query_result['reference_label'],'id')

In [None]:
get_images_from_cat(cat_model.query(image_test[0:1]))['image'].show()

In [None]:
dog_model.query(image_test[0:1])

In [None]:
def get_images_from_dog(query_result):
    return image_test.filter_by(query_result['reference_label'],'id')

In [None]:
dog_model.query(image_test[0:1])

In [None]:
cat = image_train[image_train['id']==16976]
cat['image'].show()

In [None]:
get_images_from_dog(dog_model.query(image_test[0:1]))['image'].show()

In [None]:
cat_model.query(image_test[0:1])['distance'].mean()

In [None]:
dog_model.query(image_test[0:1])['distance'].mean()

In [None]:
cat_model.query(image_test[0:1])['distance'][1:5].mean()

In [None]:
dog_model.query(image_test[0:1])['distance'][1:5].mean()

In [None]:
image_test_automobile = image_test[image_test['label']=='automobile']
image_test_cat = image_test[image_test['label']=='cat']
image_test_dog = image_test[image_test['label']=='dog']
image_test_bird = image_test[image_test['label']=='bird']

In [None]:
dog_dog_neighbors = dog_model.query(image_test_dog, k=1)
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)
dog_bird_neighbors = bird_model.query(image_test_dog, k=1)
dog_automobile_neighbors = automobile_model.query(image_test_dog, k=1)

In [None]:
dog_distances = graphlab.SFrame({'dog-automobile': dog_automobile_neighbors['distance'],
                              'dog-bird': dog_bird_neighbors['distance'],
                              'dog-cat': dog_cat_neighbors['distance'],
                              'dog-dog': dog_dog_neighbors['distance']})

In [None]:
def is_dog_correct(row):
    x = min(row, key=row.get) == 'dog-dog'
    if x == True:
        y = 1
    else:
        y = 0
    return y

In [None]:
dog_distances.apply(is_dog_correct).sum()

In [None]:
dog_distances.num_rows()

# Para ver si quedo bien todo:

In [None]:
cat_dog_neighbors = dog_model.query(image_test_cat, k=1)
cat_cat_neighbors = cat_model.query(image_test_cat, k=1)
cat_bird_neighbors = bird_model.query(image_test_cat, k=1)
cat_automobile_neighbors = automobile_model.query(image_test_cat, k=1)

In [None]:
cat_distances = graphlab.SFrame({'dog-automobile': cat_automobile_neighbors['distance'],
                              'dog-bird': cat_bird_neighbors['distance'],
                              'dog-cat': cat_cat_neighbors['distance'],
                              'dog-dog': cat_dog_neighbors['distance']})

In [None]:
cat_distances.apply(is_dog_correct).sum()