In [None]:
%load_ext autoreload
%autoreload 2
try:
    from tqdm import tqdm_notebook
except:
    def tqdm_notebook(iterable):
        return iterable

## Welcome!

Before we begin, make sure you have the following files downloaded and unzipped in this directory

[MSCOCO data](https://drive.google.com/file/d/1hJrp-vn44zKPNknmMkvlPvRf2qrErzmB/view?usp=sharing)
[British Library data](https://drive.google.com/file/d/1bJ-l9HchOzLXIhCGecz0sS3arPhZoA1H/view?usp=sharing)

you can run the following cell to check to make sure that you have this done.

In [None]:
import os
if not os.path.exists('coco_workshop/0.jpg'):
    print("Please download the COCO workshop data.")
if not os.path.exists('bl_workshop/0.jpg'):
    print("Please download the BL workshop data.")

In [None]:
# lets see what files are in these folders!
files = os.listdir('coco_workshop')
print(files[:10])

## Part 1: Loading images

In this part of the workshop, we will show how to load an image into python using the python image library (PIL). We will also display the image.

In [None]:
from PIL import Image
file_path = 'coco_workshop/22.jpg'
my_image = Image.open(file_path)
display(my_image)

In [None]:
# lets try a few fun filters out on the image!
from PIL import ImageFilter

modified_image = my_image.filter(ImageFilter.BLUR)
display(modified_image)
modified_image = my_image.filter(ImageFilter.EDGE_ENHANCE)
display(modified_image)
modified_image = my_image.filter(ImageFilter.FIND_EDGES)
display(modified_image)

In [None]:
my_image_size = my_image.size
print(my_image_size)

This means that this image is 640 pixels wide by 427 pixels high.

## Part 2: Images as Arrays

To a computer, an image is simply a big list of numbers. In particular, a useful representation of the image is the Red-Green-Blue (RGB) space representation. In the same way a printer can use red, green, and blue ink to represent most colors, a computer can, too. For each "pixel" in the input image, there will be three values that express to the computer what color is should be: a red value, a green value, and a blue value. So -- an image can be represented by a long list of numbers: for each x coordinate and for each y coordinate there will be a R,G,B triple. [Image source](https://web.stanford.edu/class/cs101/image-1-introduction.html)

![How a computer represents an image](images/pixels.png)

In [None]:
import numpy as np
image_array = np.asarray(my_image)

In [None]:
image_array.shape

What do the dimensions of this array represent?

In [None]:
from util import isolate_channel
red_intensity_map = isolate_channel(image_array, 0)
blue_intensity_map = isolate_channel(image_array, 1)
green_intensity_map = isolate_channel(image_array, 2)

In [None]:
red_map = Image.fromarray(red_intensity_map)
green_map = Image.fromarray(green_intensity_map)
blue_map = Image.fromarray(blue_intensity_map)
display(my_image)
display(red_map)
display(green_map)
display(blue_map)

In [None]:
# do these images really combine to form the original?
combined_map = red_intensity_map + green_intensity_map + blue_intensity_map
combined_image = Image.fromarray(combined_map)
display(combined_image)

## Part 3: Image Representation 1: Average Color

One goal of computer vision is to come up with vector representations of images. Lets see if we can use the average color for an image as a representation.

In [None]:
image_path1 = 'coco_workshop/22.jpg'
image_path2 = 'coco_workshop/23.jpg'
image_path3 = 'coco_workshop/24.jpg'
image1 = Image.open(image_path1)
image2 = Image.open(image_path2)
image3 = Image.open(image_path3)

In [None]:
from util import compute_mean_color
mean_red, mean_green, mean_blue = compute_mean_color(image1)
print(mean_red, mean_green, mean_blue)

In [None]:
from util import constant_color_image
display(constant_color_image(mean_red, mean_green, mean_blue))

In [None]:
for path in [image_path1, image_path2, image_path3]:
    print("Image: " + path)
    image = Image.open(path)
    display(image)
    mean_red, mean_green, mean_blue = compute_mean_color(image)
    display(constant_color_image(mean_red, mean_green, mean_blue))

In [None]:
# lets get the mean color for all images!
base_path = 'coco_workshop/'
n_images = 5000
all_means = np.empty((n_images, 3)) #indexed by image_idx, channel
for idx in range(n_images):
    fname = base_path + str(idx) + ".jpg"
    print("processing " + fname)
    image = Image.open(path)
    mean_red, mean_green, mean_blue = compute_mean_color(image)
    all_means[idx,0] = mean_red
    all_means[idx,1] = mean_green
    all_means[idx,2] = mean_blue

In [None]:
print(all_means[0,:])

In [None]:
print(all_means.shape)

## Part 4: Nearest neighbor search

Now that we have a vector representation of each image (specifically: our vector is of length 3, representing the average color) we can search for nearest neighbors of each image according to this representation. In this space, two images are similar if they have similar average colors. Is this a good notion of similarity? What could this potentially be used for?

We will use the [Annoy library](https://github.com/spotify/annoy/) to perform nearest neighbor search.

In [None]:
# we need to build an object that will let us perform nearest-neighbor search!
from annoy import AnnoyIndex
searcher = AnnoyIndex(3) # we need to give it the dimension of the representation
# now, we need to insert each image...
for idx in range(n_images):
    searcher.add_item(idx, all_means[idx,:])
searcher.build(10) # don't worry about the 10 -- it's an internal parameter

Now, we can search for nearest neighbors for images of different indices.

In [None]:
neighbors_of_first_image = searcher.get_nns_by_item(0, 10)

In [None]:
print(neighbors_of_first_image)

In [None]:
image_idx = 22
neighbors_of_first_image = searcher.get_nns_by_item(image_idx, 10)
print("Start image:")
fname = base_path + str(image_idx) + ".jpg"
image = Image.open(fname)
display(image)
r, g, b = compute_mean_color(image)
display(constant_color_image(r,g,b))

fname = base_path + str(idx) + ".jpg"
for idx in neighbors_of_first_image:
    fname = base_path + str(idx) + ".jpg"
    image = Image.open(fname)
    display(image)
    r, g, b = compute_mean_color(image)
    display(constant_color_image(r,g,b))

## Part : Using a Deep Neural Network to Represent Images

In [None]:
# the neural network library needs an in-order list of all files
n_images = 5000
base = 'coco_workshop/'
all_files = []
for idx in range(n_images):
    all_files.append(base + str(idx) + '.jpg')

For the sake of efficiency, we will be running MobileNet, which is a roughly 13-layer neural network designed for mobile applications. In practice, neural networks can be up to 1000 layers deep!

In [None]:
from util import load_images_for_neural_network

from keras.applications.mobilenet import MobileNet

neural_net = MobileNet(include_top=False,
                       input_shape=(224,224,3),
                       pooling='avg')
neural_net.summary()

batch_size = 64

images_for_nn = load_images_for_neural_network(all_files,
                                               batch_size=64)

representations = neural_net.predict_generator(images_for_nn,
                                               steps=n_images/batch_size+1,
                                               verbose=1)
representations = representations[:5000,:]

In [None]:
print(representations.shape)

In [None]:
searcher = AnnoyIndex(1024) # we need to give it the dimension of the representation
# now, we need to insert each image...
for idx in range(n_images):
    searcher.add_item(idx, representations[idx,:])
searcher.build(10) # don't worry about the 10 -- it's an internal parameter

In [None]:
image_idx = 119
neighbors_of_image, dists = searcher.get_nns_by_item(image_idx, 10,include_distances=True)
print("Start image:")
fname = base_path + str(image_idx) + ".jpg"
image = Image.open(fname)
display(image)

fname = base_path + str(idx) + ".jpg"
for dist_idx, idx in enumerate(neighbors_of_image[1:]):
    fname = base_path + str(idx) + ".jpg"
    image = Image.open(fname)
    display(image)
    print("Distance to original image = " + str(dists[dist_idx+1]))