# Streak Interpretations for Image Prediction (Transfer Learning)

(Adapted from LIME Keras tutorial
https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20Image%20Classification%20Keras.ipynb)

In [None]:
import os
import matplotlib.pyplot as plt
import keras
from keras.applications import inception_v3 as inc_net
from keras.preprocessing import image
from keras.applications.imagenet_utils import decode_predictions
from skimage.io import imread
from lime.explanation import id_generator
import numpy as np
from time import time
from skimage import io
import load_networks
import lime
import lime_image_streak
from skimage.segmentation import mark_boundaries
from tf_predict import *
# from tensorflow.examples import label_image
# import label_image
print('using keras:', keras.__version__)

## Inception Transfer Learning

Retrain the last layer of the InceptionV3 pretrained model, and interpret the predictions of new, preprocessed images

In [None]:
#keras preprocessing function
def transform_img_fn(path_list):
    out = []
    for img_path in path_list:
        img = image.load_img(img_path, target_size=(299, 299))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = inc_net.preprocess_input(x)
        out.append(x)
    return np.vstack(out)

In [None]:
#tensorflow preprocessing functions
def import_tf_imgs(path_list):
    out = []
    for img_path in path_list:
        # tmp  = label_image.read_tensor_from_image_file(img_path, input_height=299, input_width=299)
        tmp = io.imread(img_path)
        out.append(tmp)
    # return np.vstack(out)
    return out

def import_tf_img(img_path):
    # return label_image.read_tensor_from_image_file(img_path, input_height=299, input_width=299)
    return np.expand_dims(io.imread(img_path),axis=0)


In [None]:
#setup transfer learning code

from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D, Conv2D, MaxPooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras.utils.np_utils import to_categorical
# from tf.examples.image_retraining import retrain
import retrain
from shutil import copy2

# #preprocessing, separate full dataset into training, validation, and testing
# data_dir = '~/flower_photos'
# dest_dir = '~/flower_photos_retrain'
# image_lists = retrain.create_image_lists(data_dir,testing_percentage=10,validation_percentage=10)
# print sum([len(image_lists[label]['training']) for label in image_lists.keys()])
# print sum([len(image_lists[label]['validation']) for label in image_lists.keys()])
# print image_lists['tulips'].keys()
# for label in image_lists.keys():
#     for set_name in ['training','validation','testing']:
#         for file_name in image_lists[label][set_name]:
#             copy2(os.path.join(data_dir,label,file_name),os.path.join(dest_dir,set_name,label,file_name))

img_width, img_height = 299, 299
train_data_dir = "~/flower_photos_retrain/training"
validation_data_dir = "~/flower_photos_retrain/validation"
nb_train_samples = 3056
nb_validation_samples = 451
batch_size = 16
epochs = 50
bottleneck_size = 2048
model = inc_net.InceptionV3(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3), pooling=None)

In [None]:
#save 2nd to last layer of inceptionv3 as numpy features
#then train a fully connected layer and save
#then load this fc layer and append to inceptionV3

datagen = ImageDataGenerator(rescale=1. / 255)

generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=1,
        class_mode=None,  # this means our generator will only yield batches of data, no labels
        shuffle=False)  # our data will be in order, so all first 1000 images will be cats, then 1000 dogs
bottleneck_features_train = model.predict_generator(generator, nb_train_samples, verbose=1)
# save the output as a Numpy array
np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)

generator2 = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=1,
        class_mode=None,
        shuffle=False)
bottleneck_features_validation = model.predict_generator(generator2, nb_validation_samples, verbose=1)
# save the output as a Numpy array
np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)

In [None]:
#retrain
from keras.utils.np_utils import to_categorical
train_data = np.load(open('bottleneck_features_train.npy'))
validation_data = np.load(open('bottleneck_features_validation.npy'))
classes = ['daisy','dandelion','roses','sunflowers','tulips']
train_sizes = [len([name for name in os.listdir(os.path.join(train_data_dir,c)) 
                        ]) for c in classes]
validation_sizes = [len([name for name in os.listdir(os.path.join(validation_data_dir,c)) 
                        ]) for c in classes]
# print train_data.shape,validation_data.shape
train_labels = []
validation_labels = []
for l,label in enumerate(classes):
    train_labels.extend([l]*train_sizes[l])
    validation_labels.extend([l]*validation_sizes[l])
train_labels = to_categorical(train_labels)
validation_labels = to_categorical(validation_labels)
    
#retrain the top layer
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(len(classes), activation='sigmoid'))
top_model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

top_model.fit(train_data, train_labels,
          epochs=50,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels))

#save the weights of the top layer, 2.6MB
top_model.save_weights('retrained/bottleneck_fc_model.h5')

## Look at predictions for a few images

In [None]:
images_list = [os.path.join('daisy','705422469_ffa28c566d.jpg'),
        'flowers_etsy.jpg', #contains both sunflowers and daisies, good for 2 top_labels
        os.path.join('daisy','176375506_201859bb92_m.jpg'),
        os.path.join('sunflowers','2979297519_17a08b37f6_m.jpg'), 
        os.path.join('daisy','301964511_fab84ea1c1.jpg'),
        os.path.join('sunflowers','19504937128_a4ae90fcbd_m.jpg')
        ]
classes = ['daisy','dandelion','roses','sunflowers','tulips']

In [None]:
#import keras transfer learning model
inet_model = load_networks.load_keras_inception_transfer()
images = transform_img_fn(images_list)

#print predictions
preds = inet_model.predict(images)
# print preds
for ii,pr in enumerate(preds):
    print images_list[ii]
    sortedClasses = np.argsort(preds[ii])[-5:][::-1] #the indices that lime uses in explanation
    for s in sortedClasses:
        # print (classes[s],preds[ii][s])
        print '(%s, %.5f)' % (classes[s],preds[ii][s])
        
predict = inet_model.predict

In [None]:
# #keras (original) inception model
# inet_model = load_networks.load_keras_inception_imagenet()
# images = transform_img_fn(images_list)

# preds = inet_model.predict(images)
# for ii,pr in enumerate(preds):
#     print images_list[ii]
#     for x in decode_predictions(preds)[ii]:
#         print '(%s, %.5f)' %  (x[1],x[2])
# predict = inet_model.predict

In [None]:
# #tensorflow transfer learning
# g = load_networks.load_tf_transfer()
# images = import_tf_imgs(images_list)

# preds = tf_predict(images,0,5,g)
# print preds
# for ii,pr in enumerate(preds):
#     print images_list[ii]
#     sortedClasses = np.argsort(preds[ii])[-5:][::-1] #the indices that lime uses in explanation
#     for s in sortedClasses:
#         # print (classes[s],preds[ii][s])
#         print '(%s, %.5f)' % (classes[s],preds[ii][s])

# predict = lambda x: tf_predict(x,0,5,g)

In [None]:
#explanation plotting function
def explanation_plotting(explanation,label_idx,fs_string):
    label = explanation.top_labels[label_idx]
    temp, mask = explanation.get_image_and_mask(label, positive_only=True, num_features=5, hide_rest=True)
    plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    print "%s, %s" % (classes[label],fs_string)
    plt.show()

    #then plot image with positive segments marked in green and negative segments marked in red
    temp, mask = explanation.get_image_and_mask(label, positive_only=False, num_features=5, hide_rest=False)
    plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    plt.show()

    #plot original image, explantation agains black background with segments
    fig = plt.figure()
    ax1 = fig.add_subplot(121)
    ax1.imshow(explanation.image/2 + 0.5)
    ax1.set_xticklabels("")
    ax1.set_yticklabels("")
    ax2 = fig.add_subplot(122)
    finalMask = explanation.segments
    finalTemp = lime_image_streak.SegmentedImage.add_image_segments(-1*np.ones_like(explanation.image),
                                explanation.image,explanation.segments,[x[0] for x in explanation.local_exp[label]][:5])
    ax2.imshow(mark_boundaries(finalTemp/2 + 0.5,finalMask))
    ax2.set_xticklabels("")
    ax2.set_yticklabels("")
    plt.show()

**Get an explanation**

In [None]:
#new explainer class that can support streak feature_selection
query_image = images[0]
#Method described in Section 6.2 of the paper
explainer = lime_image_streak.LimeImageExplainer(feature_selection='greedy_likelihood')

In [None]:
%%time
explanation = explainer.explain_instance(query_image, classifier_fn=predict, 
#                                          top_labels=2, num_features=5, 
                                         top_labels=1, num_features=5, 
                                         qs_kernel_size=6, hide_color=0, 
                                         num_samples=1000)

**Explanations for the top class**

(for explanation of the 2nd top class, change top_labels to 2)

In [None]:
explanation_plotting(explanation,0,'greedy_likelihood')
# explanation_plotting(explanation,1,'greedy_likelihood')

In [None]:
#now try another selection method
#LIME baseline method
explainerFS = lime_image_streak.LimeImageExplainer(feature_selection='forward_selection')

In [None]:
%%time
explanationFS = explainerFS.explain_instance(query_image, classifier_fn=predict, 
                                             top_labels=2, num_features=5, 
                                             qs_kernel_size=6, hide_color=0, 
                                             num_samples=1000)

In [None]:
explanation_plotting(explanationFS,0,'forward_selection')

In [None]:
explanation_plotting(explanationFS,1,'forward_selection')

In [None]:
#now try another selection method
#Method described in Section A.8 of the paper
explainerSG = lime_image_streak.LimeImageExplainer(feature_selection='streaming_greedy')

In [None]:
%%time
explanationSG = explainerSG.explain_instance(query_image, classifier_fn=predict, 
                                             top_labels=2, num_features=5, 
                                             qs_kernel_size=6, hide_color=0, 
                                             num_samples=1000)

In [None]:
explanation_plotting(explanationSG,0,'streaming_greedy')

In [None]:
explanation_plotting(explanationSG,1,'streaming_greedy')