In [1]:
import numpy as np
import tensorflow as tf
import tensornets as nets
import os
from PIL import Image
from tqdm import tqdm, tqdm_notebook
import pickle
import math
import skimage
import skimage.io
import skimage.transform
import matplotlib.pyplot as plt

### Implementation Guide:  
- [X] Read In Images  
- [ ] Create Labels
- [ ] Split data into training and validation- 
- [ ] Load in Model
- [ ] Change last layer of model
- [ ] Manipulate images to fit model
- [ ] Train Model with new data
- [ ] Validate Perfomance
- [ ] Save Model
- [ ] Write function to classify 1 image

### Reading Images From File

In [None]:
classes = ["Can", "Cookies", "Eggs", "Empty", "Fruit"]
dataDict = {classID: [] for classID in classes}
img_dir = "./Classes/"
for classID in tqdm_notebook(classes):
    path = img_dir + classID
    images = os.listdir(path)
    #print(len(images), classID)
    for image in tqdm_notebook(images):
        pic = Image.open(os.path.join(path,image))
        pix = np.array(pic.getdata()).reshape(pic.size[1], pic.size[0], 3)
        dataDict[classID] = dataDict[classID] + [pix]    

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=166), HTML(value='')))

HBox(children=(IntProgress(value=0, max=166), HTML(value='')))

### Splitting into train and validate

In [None]:
def one_hot_encode(x):
    classEncoder = {classID:i for (i, classID) in enumerate(classes)}
    encoded = np.zeros((len(x), 5))
    
    for idx, val in enumerate(x):
        encoded[idx][classEncoder[val]] = 1
        
    return encoded


In [None]:
def train_validate_split(validfrac = .1):
    valid_features = []
    valid_labels = []
    train_features = []
    train_labels = []
    for classID in tqdm(classes):
        num_images = len(dataDict[classID])
        validIdx = math.ceil(num_images*validfrac)
        valid_features.extend(dataDict[classID][:validIdx])
        valid_labels.extend([classID]*validIdx)
        train_features.extend(dataDict[classID][validIdx:])
        train_labels.extend([classID]*(num_images-validIdx))
    return (valid_features, valid_labels, train_features, train_labels)

(valid_features, valid_labels, train_features, train_labels) = train_validate_split()
one_hot_valid = one_hot_encode(valid_labels)
one_hot_train = one_hot_encode(train_labels)
print(len(valid_features), len(valid_labels))
print(len(train_features), len(train_labels))

        
        
        
    
    

In [None]:
def _preprocess_and_save(features, labels, filename):
    labels = one_hot_encode(labels)

    pickle.dump((features, labels), open(filename, 'wb'))


def preprocess_and_save_data():
    n_batches = 23
    batchPath = "batchedImages/"
    (valid_features, valid_labels, train_features, train_labels) = train_validate_split()
    
    batchLen = math.floor(len(valid_features)/n_batches)
    for batch_i in range(1, n_batches + 1):
        features = train_features[(batch_i-1)*batchLen:(batch_i*batchLen)]
        labels = train_labels[(batch_i-1)*batchLen:(batch_i*batchLen)]
        
        # find index to be the point as validation data in the whole dataset of the batch (10%)

        # preprocess the 90% of the whole dataset of the batch
        # - normalize the features
        # - one_hot_encode the lables
        # - save in a new file named, "preprocess_batch_" + batch_number
        # - each file for each batch
        _preprocess_and_save(features, labels, 
                             batchPath+'preprocess_batch_' + str(batch_i) + '.p')
    _preprocess_and_save(np.array(valid_features), np.array(valid_labels),
                         batchPath+'preprocess_validation.p')
    _preprocess_and_save(np.array(valid_features), np.array(valid_labels),
                         batchPath+'preprocess_testing.p')



In [None]:
preprocess_and_save_data()

## INPUT and OUTPUT tensors

In [None]:
valid_features, valid_labels = pickle.load(open('preprocess_validation.p', mode='rb'))

In [None]:
x = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input_x')
y = tf.placeholder(tf.float32, shape=(None, 5), name='output_y')



In [None]:
### HYPER-PARAMETERS
learning_rate = 0.00001
epochs = 16
batch_size = 16

In [None]:
logits = nets.VGG19(x, is_training=True, classes=5)
model = tf.identity(logits,name='logits')
loss = tf.losses.softmax_cross_entropy(y,logits)
train = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)

correct_pred = tf.equal(tf.argmax(model,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

In [None]:
logits.print_outputs()

In [None]:
logits.print_summary()

In [None]:
def batch_features_labels(features, labels, batch_size):
    """
    Split features and labels into batches
    """
    for start in range(0, len(features), batch_size):
        end = min(start + batch_size, len(features))
        yield features[start:end], labels[start:end]

In [None]:
def load_preprocess_training_batch(batch_id, batch_size):
    """
    Load the Preprocessed Training data and return them in batches of <batch_size> or less
    """
    batchPath = "batchedImages/"
    filename = batchPath+'preprocess_batch_' + str(batch_id) + '.p'
    features, labels = pickle.load(open(filename, mode='rb'))
    
    tmpFeatures = []
    
    for feature in features:
        feature = np.copy(feature).astype('float')
        tmpFeature = skimage.transform.resize(feature, (224, 224))
        tmpFeature = np.copy(tmpFeature).astype('uint8')        
        tmpFeatures.append(tmpFeature)

    # Return the training data in batches of size <batch_size> or less
    return batch_features_labels(tmpFeatures, labels, batch_size)

In [None]:
tmpValidFeatures = []

for feature in valid_features:
    feature = np.copy(feature).astype('float')
    tmpValidFeature = skimage.transform.resize(feature, (224, 224))
    tmpValidFeature = np.copy(tmpValidFeature).astype('uint8')
    tmpValidFeatures.append(tmpValidFeature)
    
tmpValidFeatures = np.array(tmpValidFeatures)

In [None]:
print(tmpValidFeatures.shape)

In [None]:
save_model_path = './image_classification'

print('Training...')
with tf.Session() as sess:    
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    print('global_variables_initializer ... done ...')
    sess.run(logits.pretrained())
    print('model.pretrained ... done ... ')    
    
    # Training cycle
    print('starting training ... ')
    for epoch in range(epochs):
        # Loop over all batches
        n_batches = 5
        for batch_i in range(1, n_batches + 1):
            for batch_features, batch_labels in load_preprocess_training_batch(batch_i, batch_size):
                sess.run(train, {x: batch_features, y: batch_labels})
                
            print('Epoch {:>2}, Batch {}:  '.format(epoch + 1, batch_i), end='')
            
            # calculate the mean accuracy over all validation dataset
            valid_acc = 0
            for batch_valid_features, batch_valid_labels in batch_features_labels(tmpValidFeatures, valid_labels, batch_size):
                valid_acc += sess.run(accuracy, {x:batch_valid_features, y:batch_valid_labels})
            
            tmp_num = tmpValidFeatures.shape[0]/batch_size
            print('Validation Accuracy: {:.6f}'.format(valid_acc/tmp_num))
            
    # Save Model
    saver = tf.train.Saver()
    save_path = saver.save(sess, save_model_path)


In [None]:
def load_label_names():
    return classes


In [None]:
from sklearn.preprocessing import LabelBinarizer

def display_image_predictions(features, labels, predictions):
    n_classes = 5
    label_names = load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axs = plt.subplots(10, 2, figsize=(12,24))

    margin = 0.05
    ind = np.arange(n_classes)
    width = (1. - 2. * margin) / n_classes    
    
    for image_i, (feature, label_id, prediction) in enumerate(zip(features, label_ids, predictions)):
        correct_name = label_names[label_id]
        pred_name = label_names[np.argmax(prediction)]
        
        is_match = 'False'        
        
        if np.argmax(prediction) == label_id:
            is_match = 'True'
            
        predictions_array = []
        pred_names = []
        
        for index, pred_value in enumerate(prediction):
            tmp_pred_name = label_names[index]
            predictions_array.append({tmp_pred_name : pred_value})
            pred_names.append(tmp_pred_name)
        
        print('[{}] ground truth: {}, predicted result: {} | {}'.format(image_i, correct_name, pred_name, is_match))
        print('\t- {}\n'.format(predictions_array))
        
#         print('image_i: ', image_i)
#         print('axs: ', axs, ', axs len: ', len(axs))
        axs[image_i][0].imshow(feature)
        axs[image_i][0].set_title(pred_name)
        axs[image_i][0].set_axis_off()
        
        axs[image_i][1].barh(ind + margin, prediction, width)
        axs[image_i][1].set_yticks(ind + margin)
        axs[image_i][1].set_yticklabels(pred_names)
        
    plt.tight_layout()

In [None]:
test_features, test_labels = pickle.load(open('preprocess_testing.p', mode='rb'))
tmpFeatures = []

for feature in test_features:
    feature = np.copy(feature).astype('float')
    tmpFeature = skimage.transform.resize(feature, (224, 224))
    tmpFeature = np.copy(tmpFeature).astype('uint8')
    tmpFeatures.append(tmpFeature)

tmpFeatures = np.asarray(tmpFeatures)

In [None]:


%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import random

save_model_path = './image_classification'
batch_size = 64
n_samples = 10
top_n_predictions = 5

def test_model(tmpFeatures):
    loaded_graph = tf.Graph()
    
    with tf.Session(graph=loaded_graph) as sess:
        loader = tf.train.import_meta_graph(save_model_path + '.meta')
        loader.restore(sess, save_model_path)
        
        # Get accuracy in batches for memory limitations
        test_batch_acc_total = 0
        test_batch_count = 0
        
        loaded_x = loaded_graph.get_tensor_by_name('input_x:0')
        loaded_y = loaded_graph.get_tensor_by_name('output_y:0')
        loaded_logits = loaded_graph.get_tensor_by_name('logits:0')
        loaded_acc = loaded_graph.get_tensor_by_name('accuracy:0')
        
        for train_feature_batch, train_label_batch in batch_features_labels(tmpFeatures, test_labels, batch_size):
            test_batch_acc_total += sess.run(
                loaded_acc,
                feed_dict={loaded_x: train_feature_batch, loaded_y: train_label_batch})
            test_batch_count += 1

        print('Testing Accuracy: {}\n'.format(test_batch_acc_total/test_batch_count))

        # Print Random Samples
        random_test_features, random_test_labels = tuple(zip(*random.sample(list(zip(test_features, test_labels)), n_samples)))
        
        tmpTestFeatures = []
    
        for feature in random_test_features:
            feature = np.copy(feature).astype('float')
            tmpTestFeature = skimage.transform.resize(feature, (224, 224))
            tmpTestFeature = np.copy(tmpTestFeature).astype('uint8')            
            tmpTestFeatures.append(tmpTestFeature)
           
        random_test_predictions = sess.run(
            tf.nn.softmax(loaded_logits),
            feed_dict={loaded_x: tmpTestFeatures, loaded_y: random_test_labels})
        
        display_image_predictions(random_test_features, random_test_labels, random_test_predictions)

test_model(tmpFeatures)



References
https://github.com/deep-diver/CIFAR10-VGG19-Tensorflow/blob/master/CIFAR10-transfer-learning-tensornets.ipynb  
https://towardsdatascience.com/transfer-learning-in-tensorflow-9e4f7eae3bb4