# 8 Deep Learning

8.1 Use a pre-trained ResNet50 and VGG16 on Keras to implement an inference of the test image in this directory!

* Example: <https://keras.io/applications/>


8.2 Use a pre-trained ResNet50 and VGG16 on PyTorch to implement an inference of the test image in this directory!

* Example: http://pytorch.org/docs/master/torchvision/models.html

8.3 Compare the inference times of both networks and frameworks!

8.4 Find two positive and negative sample images that are correctly/incorrectly classified!


In [19]:
import time
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions as decode_predictions_res

import numpy as np

model = ResNet50(weights='imagenet')

img_path = 'test_image.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

k_res_start = time.time()
preds = model.predict(x)
k_res_time = time.time()-k_res_start
preds
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions_res(preds, top=3)[0])
# Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]

Predicted: [('n04604644', 'worm_fence', 0.07046531), ('n02793495', 'barn', 0.06862043), ('n03000134', 'chainlink_fence', 0.061941862)]


In [20]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions as decode_predictions_vgg

model = VGG16(weights='imagenet', include_top=True)

img_path = 'test_image.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

k_vgg_start = time.time()
features = model.predict(x)
k_vgg_time = time.time()-k_vgg_start

print('Predicted:', decode_predictions_vgg(features)[0])

Predicted: [('n02793495', 'barn', 0.07425918), ('n04604644', 'worm_fence', 0.058995645), ('n04326547', 'stone_wall', 0.039503533), ('n03891251', 'park_bench', 0.027926859), ('n02965783', 'car_mirror', 0.025498722)]


In [21]:
import torchvision.models as models
import torchvision.utils
import torchvision.transforms as transforms
import torch.autograd.variable as Variable
from PIL import Image

resnet50 = models.resnet50(pretrained=True).eval()
#resnet50 = models.resnet50(pretrained=True)

path_img = 'test_image.jpg'
image = Image.open(path_img)
transformation = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

image_tensor = transformation(image).float()
image_tensor = image_tensor.unsqueeze_(0)
input = Variable(image_tensor)

t_res_start = time.time()
output = resnet50(input)
t_res_time = time.time()-t_res_start
decode_predictions_res(output.detach().numpy(), top=3)[0]



[('n04604644', 'worm_fence', 8.481195),
 ('n02793495', 'barn', 8.475863),
 ('n03891251', 'park_bench', 8.366762)]

In [22]:
import torchvision.models as models
import torchvision.utils
import torchvision.transforms as transforms
import torch.autograd.variable as Variable
from PIL import Image

vgg16 = models.vgg16(pretrained=True).eval()

t_vgg_start = time.time()
output = vgg16(input)
t_vgg_time = time.time()-t_vgg_start
decode_predictions_vgg(output.detach().numpy())[0]


[('n02793495', 'barn', 9.087323),
 ('n04604644', 'worm_fence', 8.780945),
 ('n03891251', 'park_bench', 8.334769),
 ('n04532670', 'viaduct', 7.6061187),
 ('n09332890', 'lakeside', 7.384339)]

In [26]:
[k_res_time,k_vgg_time,t_res_time,t_vgg_time]

[3.112948179244995, 1.7309412956237793, 0.6141815185546875, 1.809493064880371]

In [28]:
for i in ["img1","img2","img3","img4"]:
    path_img = i + ".jpg"
    image = Image.open(path_img)
    transformation = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])

    image_tensor = transformation(image).float()
    image_tensor = image_tensor.unsqueeze_(0)
    input = Variable(image_tensor)

    t_res_start = time.time()
    output = resnet50(input)
    t_res_time = time.time()-t_res_start
    print(decode_predictions_res(output.detach().numpy(), top=3)[0])

[('n02484975', 'guenon', 13.237154), ('n02488702', 'colobus', 12.235919), ('n01622779', 'great_grey_owl', 10.033047)]
[('n01968897', 'chambered_nautilus', 10.218805), ('n01910747', 'jellyfish', 9.710601), ('n03775546', 'mixing_bowl', 9.596883)]
[('n09472597', 'volcano', 12.595714), ('n03388043', 'fountain', 11.021697), ('n03729826', 'matchstick', 10.211475)]
[('n02948072', 'candle', 10.652655), ('n03729826', 'matchstick', 10.208814), ('n03590841', "jack-o'-lantern", 9.003959)]


In [None]:
img1: true
img2: false 
img3: false
img4: false