## Pretrained CNN: *transfer learning*

Nature article: [Dermatologist-level classification of skin cancer with deep neural networks](http://www.nature.com/nature/journal/v542/n7639/full/nature21056.html)

In [1]:
import dogs_vs_cats as dvc
all_files = dvc.image_files()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


data file train.zip has already been downloaded
Data has already been extracted


## Imagenet pretrained models
Documentation from: https://keras.io/applications/. In `keras.applications` namespace we have the latest top accuracy solutions of imagenet 2012 classification contest.

In [None]:
from keras.applications.nasnet import NASNetMobile
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np

# https://keras.io/applications/#vgg16
model = NASNetMobile(weights='imagenet')

input_image_shape = (224,224,3)
img_path = all_files[10]
img = image.load_img(img_path, target_size=input_image_shape[1:])
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
# decode the results into a list of tuples (class, description, probability)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.8/NASNet-mobile.h5


In [None]:
model.summary()

Imagenet 1000 classes:
* http://image-net.org/explore

In [5]:
# (one such list for each sample in the batch)

print('Predicted:', decode_predictions(preds, top=5))

Predicted: [[('n02124075', 'Egyptian_cat', 0.15671857), ('n15075141', 'toilet_tissue', 0.10072913), ('n04209239', 'shower_curtain', 0.085384056), ('n04493381', 'tub', 0.064990662), ('n02808440', 'bathtub', 0.063053794)]]


In [None]:
from IPython.display import Image
Image(img_path)

In [None]:
print(preds.shape)

In [8]:
from keras.applications.imagenet_utils import CLASS_INDEX
# Imagenet 1000 classes
CLASS_INDEX

{'241': ['n02108000', 'EntleBucher'],
 '69': ['n01768244', 'trilobite'],
 '91': ['n01824575', 'coucal'],
 '138': ['n02018795', 'bustard'],
 '902': ['n04579432', 'whistle'],
 '414': ['n02769748', 'backpack'],
 '857': ['n04429376', 'throne'],
 '573': ['n03444034', 'go-kart'],
 '510': ['n03095699', 'container_ship'],
 '133': ['n02011460', 'bittern'],
 '583': ['n03467068', 'guillotine'],
 '161': ['n02088238', 'basset'],
 '17': ['n01580077', 'jay'],
 '613': ['n03602883', 'joystick'],
 '149': ['n02074367', 'dugong'],
 '971': ['n09229709', 'bubble'],
 '57': ['n01735189', 'garter_snake'],
 '887': ['n04532106', 'vestment'],
 '14': ['n01537544', 'indigo_bunting'],
 '481': ['n02978881', 'cassette'],
 '93': ['n01829413', 'hornbill'],
 '332': ['n02328150', 'Angora'],
 '878': ['n04505470', 'typewriter_keyboard'],
 '785': ['n04162706', 'seat_belt'],
 '772': ['n04127249', 'safety_pin'],
 '26': ['n01630670', 'common_newt'],
 '797': ['n04235860', 'sleeping_bag'],
 '333': ['n02342885', 'hamster'],
 '72':

In [None]:
# predict a set of images
n_images = 10
x = np.ndarray((n_images,3,224,224))
for i,img_path in enumerate(all_files[0:n_images]):
    img = image.load_img(img_path, target_size=(224, 224))
    x[i] = image.img_to_array(img)

# preprocess and predict all together
x_preprocessed = preprocess_input(x)
preds = model.predict(x_preprocessed,verbose=1)
print("")
print(preds.shape)
dec_preds = decode_predictions(preds,top=5)
dec_preds

In [None]:
from IPython.display import Image,display

for img_path,dec_pred in zip(all_files[0:n_images],dec_preds):
    display(Image(img_path,width="120px",height="120px"))
    print(" ".join(["%s (prob: %.3f)"%(elm[1],elm[2]) for elm in dec_pred]))

## Using pretrained CNN as *feature extractors*

In [11]:
# load model without top layer
n_images_train=500
n_images_test=500
input_image_shape = (3,224,224)
train_features, train_labels,train_files, \
test_features, test_labels, test_files = dvc.training_test_datasets(all_files,
                                                                    n_images_train,n_images_test,
                                                                    input_image_shape)

Loading train set
loading image (1/500)
loading image (101/500)
loading image (201/500)
loading image (301/500)
loading image (401/500)
Loading test set
loading image (1/500)
loading image (101/500)
loading image (201/500)
loading image (301/500)
loading image (401/500)


In [12]:
# load_img from keras.preprocessing loads the images in [0,255] scale
train_features = preprocess_input(train_features)
test_features = preprocess_input(test_features)

In [13]:
from keras.models import Model
base_model = VGG16(weights='imagenet')
model = Model(input=base_model.input, output=base_model.get_layer('fc2').output)

print("Predicting train images")
train_features_cnn = model.predict(train_features,verbose=1)
print("Predicting test images")
test_features_cnn = model.predict(test_features,verbose=1)

Predicting train images
Predicting test images


In [14]:
train_features_cnn.shape

(500, 4096)

In [15]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV

tuned_parameters = {'kernel': ['linear'],
                     'C': [1, 10, 100, 1000]}

clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5,n_jobs=7)
clf.fit(train_features_cnn, train_labels)

clf.best_estimator_

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
print("Train score: {}".format(clf.score(train_features_cnn,train_labels)))
print("Test score: {}".format(clf.score(test_features_cnn,test_labels)))

Train score: 1.0
Test score: 0.978


References: 
 * https://keras.io/applications/
 * pyimageresearch blog
 