In [1]:
import graphlab

# Load the CIFAR-10 dataset

In [3]:
train_data  =graphlab.SFrame('image_train_data/')

In [4]:
train_data.head()

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
97,Height: 32 Width: 32,automobile,"[1.57818555832, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.220677852631, 0.0, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464222, 0.0, 0.0, 0.0, 0.0, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
138,Height: 32 Width: 32,bird,"[0.658935725689, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."


# Train a nearest-neighbours model for retrieving images using deep features

In [5]:
knn_model = graphlab.nearest_neighbors.create(train_data,
                                             label='id',
                                             features=['deep_features'])

# Use image retrieval model with deep features to find similar images

In [8]:
graphlab.canvas.set_target('ipynb')
cat = train_data[18:19]
cat['image'].show()

In [9]:
knn_model.query(cat)

query_label,reference_label,distance,rank
0,384,0.0,1
0,6910,36.9403137951,2
0,39777,38.4634888975,3
0,36870,39.7559623119,4
0,41734,39.7866014148,5


In [12]:
def get_images_from_ids(query_result):
    return train_data.filter_by(query_result['reference_label'], 'id')

In [15]:
cat_neighbours = get_images_from_ids(knn_model.query(cat))

In [16]:
cat_neighbours['image'].show()

In [20]:
car = train_data[8:9]
get_images_from_ids(knn_model.query(car))['image'].show()

# Let's create a lambda to find and show nearest neighbour images

In [24]:
show_neighbours = lambda i: get_images_from_ids(knn_model.query(train_data[i:i+1]))['image'].show()

In [28]:
show_neighbours(150)

In [29]:
show_neighbours(1225)

In [30]:
show_neighbours(2000)

# Quiz questions

In [33]:
train_data['label'].sketch_summary()


+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+


#### Bird is least frequent

In [42]:
dog_data = train_data[train_data['label']=="dog"]

In [43]:
cat_data = train_data[train_data['label']=="cat"]

In [44]:
car_data = train_data[train_data['label']=="automobile"]

In [45]:
bird_data = train_data[train_data['label']=="bird"]

In [46]:
dog_model = graphlab.nearest_neighbors.create(dog_data,
                                             label='id',
                                             features=['deep_features'])

In [47]:
cat_model = graphlab.nearest_neighbors.create(cat_data,
                                             label='id',
                                             features=['deep_features'])

In [48]:
car_model = graphlab.nearest_neighbors.create(car_data,
                                             label='id',
                                             features=['deep_features'])

In [49]:
bird_model = graphlab.nearest_neighbors.create(bird_data,
                                             label='id',
                                             features=['deep_features'])

In [66]:
get_images_from_ids(cat_model.query(test_data[0:1])).show()

In [67]:
get_images_from_ids(dog_model.query(test_data[0:1])).show()

In [69]:
cat_model.query(test_data[0:1])['distance'].mean()

36.15573070978294

In [70]:
dog_model.query(test_data[0:1])['distance'].mean()

37.77071136184157

# Split the test data into categories this time

In [71]:
test_dog_data = test_data[test_data['label']=="dog"]

In [72]:
len(test_dog_data)

1000

In [73]:
test_cat_data = test_data[test_data['label']=="cat"]
test_car_data = test_data[test_data['label']=="automobile"]
test_bird_data = test_data[test_data['label']=="bird"]

In [90]:
dog_cat_neighbours = cat_model.query(test_dog_data, k=1)
dog_dog_neighbours = dog_model.query(test_dog_data, k=1)
dog_car_neighbours = car_model.query(test_dog_data, k=1)
dog_bird_neighbours = bird_model.query(test_dog_data, k=1)

In [91]:
dog_distances = graphlab.SFrame({'dog-car':dog_car_neighbours['distance'],
                              'dog-cat':dog_cat_neighbours['distance'],
                              'dog-bird':dog_bird_neighbours['distance'],
                              'dog-dog':dog_dog_neighbours['distance']})

In [92]:
dog_distances

dog-bird,dog-car,dog-cat,dog-dog
41.7538647304,41.9579761457,36.4196077068,33.4773590373
41.3382958925,46.0021331807,38.8353268874,32.8458495684
38.6157590853,42.9462290692,36.9763410854,35.0397073189
37.0892269954,41.6866060048,34.5750072914,33.9010327697
38.272288694,39.2269664935,34.778824791,37.4849250909
39.1462089236,40.5845117698,35.1171578292,34.945165344
40.523040106,45.1067352961,40.6095830913,39.0957278345
38.1947918393,41.3221140974,39.9036867306,37.7696131032
40.1567131661,41.8244654995,38.0674700168,35.1089144603
45.5597962603,45.4976929401,42.7258732951,43.2422832585


In [95]:
dog_distances[0]

{'dog-bird': 41.753864730351246,
 'dog-car': 41.957976145712024,
 'dog-cat': 36.41960770675437,
 'dog-dog': 33.47735903726335}

In [98]:
def is_dog_correct(row):
    if row['dog-dog'] < row['dog-cat'] and row['dog-dog'] < row['dog-bird'] and row['dog-dog'] < row['dog-car']:
        return 1
    else:
        return 0 

In [100]:
dog_distances.apply(is_dog_correct).sum()

678L