In [1]:
import os
import random
import glob
import random
import scipy.ndimage
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from imutils import paths
from sklearn import linear_model
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from keras.applications.inception_v3 import InceptionV3
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img

Using TensorFlow backend.


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [3]:
ROOT_DIR = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath('__file__'))))
SOURCE_DIR = os.path.abspath('/data/aumkar/train')
TARGET_DIR = os.path.abspath('/data/aumkar/validation')
DATA_LOAD_DIR = os.path.abspath('/data/aumkar/data_load')

In [4]:
source_path = list(paths.list_images(SOURCE_DIR))
target_path = list(paths.list_images(TARGET_DIR))

In [5]:
random.shuffle(source_path)

In [6]:
random.shuffle(target_path)

In [7]:
(len(source_path), len(target_path))

(152397, 55388)

In [8]:
labels_source = [p.split(os.path.sep)[-2] for p in source_path]
labels_target = [p.split(os.path.sep)[-2] for p in target_path]

In [9]:
le = LabelEncoder()
labels = le.fit_transform(labels_source)
labels_t = le.fit_transform(labels_target)

In [10]:
labels_t.shape

(55388,)

In [11]:
labels_unique = np.load(os.path.join(DATA_LOAD_DIR, 'labels.npy'))

In [12]:
weights = {0 : 0.51, 1 : 1., 2 : 0.44, 3 : 0.57, 4 : 0.77, 5 : 0.51, 6 : 0.42, 7 : 0.6, 8 : 0.68, 9 : 0.63, 10 : 0.46, 11 : 0.76}

In [13]:
clf = linear_model.SGDClassifier(max_iter = 1000, tol = 1e-3, n_jobs = -1, class_weight = weights)

In [14]:
def model_create(transfer_model):
    if transfer_model == 'InceptionV3':
        with tf.device('/gpu:1'):
            return InceptionV3(weights = 'imagenet', include_top = False)
    elif transfer_model == 'VGG16':
        with tf.device('/gpu:1'):
            return VGG16(weights = 'imagenet', include_top = False)
    elif transfer_model == 'VGG19':
        with tf.device('/gpu:1'):
            return VGG19(weights = 'imagenet', include_top = False)
    elif transfer_model == 'Xception':
        with tf.device('/gpu:1'):
            return Xception(weights = 'imagenet', include_top = False)
    elif transfer_model == 'DenseNet121':
        with tf.device('/gpu:1'):
            return DenseNet121(weights = 'imagenet', include_top = False)

In [15]:
def model_reshape(transfer_model, features):
    if transfer_model == 'InceptionV3':
        return features.reshape((features.shape[0], 5 * 5 * 2048))
    elif transfer_model == 'VGG16':
        return features.reshape((features.shape[0], 7 * 7 * 512))
    elif transfer_model == 'VGG19':
        return features.reshape((features.shape[0], 7 * 7 * 512))
    elif transfer_model == 'Xception':
        return features.reshape((features.shape[0], 7 * 7 * 2048))
    elif transfer_model == 'DenseNet121':
        return features.reshape((features.shape[0], 7 * 7 * 1024))

In [16]:
def model_train(i, model, features, label):
    
    print('Training batch: ', i + 1)
    if i == 0:
        model.partial_fit(features, label, classes = labels_unique)
    else:
        model.partial_fit(features, label)
        
    return model

In [17]:
def feature_extract(tpath, spath, batch_size, pseudo_labels, source_labels, model_, source, trained_model, iteration, n_k):
    
    pred = []
    model1 = model_create(model_)
    
    if source == 'target' and iteration != 0:
        
        idx = np.arange(len(tpath))
        np.random.shuffle(idx)
        
        sampled_idxs = idx[:int(n_k)]
        
        target_sample = np.array(tpath)[sampled_idxs]
        labels_sample = pseudo_labels[sampled_idxs]
        
        spath.extend(target_sample.tolist())
        source_labels.tolist().extend(labels_sample)
        
    for (b, i) in enumerate(range(0, len(spath), batch_size)):
        # extract the batch of images and labels, then initialize the
        # list of actual images that will be passed through the network
        # for feature extraction
        print("Processing batch {}/{}".format(b + 1, int(np.ceil(len(spath) / float(batch_size)))))
        batchPaths = spath[i: i + batch_size]
        batchLabels = np.array(source_labels)[i: i + batch_size]
        batchImages = []

        # loop over the images and labels in the current batch
        for imagePath in batchPaths:
            # load the input image using the Keras helper utility while
            # ensuring the image is resized to 224x224 pixels
            image = load_img(imagePath, target_size=(224, 224))
            image = img_to_array(image)

            # preprocess the image by (1) expanding the dimensions and
            # (2) subtracting the mean RGB pixel intensity from the
            # ImageNet dataset
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)

            # add the image to the batch
            batchImages.append(image)

        # pass the images through the network and use the outputs as our
        # actual features, then reshape the features into a flattened
        # volume
        batchImages = np.vstack(batchImages)
            
        features = model1.predict(batchImages, batch_size = batch_size)
        features1 = model_reshape(model_, features)
        
        if source == 'source' and b == 0 and iteration == 0:
            model_partial = model_train(b, clf, features1, batchLabels)
        elif source == 'source' and b != 0 and iteration == 0:
            model_partial = model_train(b, model_partial, features1, batchLabels)
        elif source == 'target' and iteration == 0:
            pred.append(target_pred(trained_model, features1))
        elif source == 'target' and b == 0 and iteration != 0:
            model_partial = model_train(b, clf, features1, batchLabels)
        elif source == 'target' and b != 0 and iteration != 0:
            model_partial = model_train(b, model_partial, features1, batchLabels)
            
    if source == 'target' and iteration != 0:
        return fin_model
    if source == 'source' and iteration == 0:
        return model_partial
    elif source == 'target' and iteration == 0:
        return np.hstack(np.asarray(pred))

In [18]:
def target_pred(model, target_features):
    
    print('Predicting batch features')
    return model.predict(target_features)

In [19]:
def sslda_online(M):
    
    score = []
    
    svm_model = feature_extract(None, source_path, 256, None, labels, 'InceptionV3', 'source', None, 0, None)
    
    target_predictions = feature_extract(None, target_path, 256, None, labels_t, 'InceptionV3', 'target', svm_model, 0, None)
    
    for i in range(1, M):
        
        print('Iteration: ', i)
         
        n = (i / float(M)) * len(target_path)
        
        if i == 1:
            mod = feature_extract(target_path, source_path, 256, target_predictions, labels, 'InceptionV3', 'target', None, i, n)
            fin_pred = feature_extract(None, target_path, 256, None, labels_t, 'InceptionV3', 'target', mod, 0, None)
        else:
            mod = feature_extract(target_path, source_path, 256, fin_pred, labels, 'InceptionV3', 'target', None, i, n)
            fin_pred = feature_extract(None, target_path, 256, None, labels_t, 'InceptionV3', 'target', mod, 0, None)
        
        score.append(accuracy_score(labels_t, fin_pred))
        
    return fin_pred, score

In [None]:
pred = []
mean_acc = []

for i in range(3):
    
    print('ENSEMBLE ITERATION: ', i+1)
    
    sslda_pred, acc = sslda_online(10)
    
    pred.append(sslda_pred)
    mean_acc.append(acc)

('ENSEMBLE ITERATION: ', 1)
Processing batch 1/596
('Training batch: ', 1)
Processing batch 2/596
('Training batch: ', 2)
Processing batch 3/596
('Training batch: ', 3)
Processing batch 4/596
('Training batch: ', 4)
Processing batch 5/596
('Training batch: ', 5)
Processing batch 6/596
('Training batch: ', 6)
Processing batch 7/596
('Training batch: ', 7)
Processing batch 8/596
('Training batch: ', 8)
Processing batch 9/596
('Training batch: ', 9)
Processing batch 10/596
('Training batch: ', 10)
Processing batch 11/596
('Training batch: ', 11)
Processing batch 12/596
('Training batch: ', 12)
Processing batch 13/596
('Training batch: ', 13)
Processing batch 14/596
('Training batch: ', 14)
Processing batch 15/596
('Training batch: ', 15)
Processing batch 16/596
('Training batch: ', 16)
Processing batch 17/596
('Training batch: ', 17)
Processing batch 18/596
('Training batch: ', 18)
Processing batch 19/596
('Training batch: ', 19)
Processing batch 20/596
('Training batch: ', 20)
Processing

('Training batch: ', 165)
Processing batch 166/596
('Training batch: ', 166)
Processing batch 167/596
('Training batch: ', 167)
Processing batch 168/596
('Training batch: ', 168)
Processing batch 169/596
('Training batch: ', 169)
Processing batch 170/596
('Training batch: ', 170)
Processing batch 171/596
('Training batch: ', 171)
Processing batch 172/596
('Training batch: ', 172)
Processing batch 173/596
('Training batch: ', 173)
Processing batch 174/596
('Training batch: ', 174)
Processing batch 175/596
('Training batch: ', 175)
Processing batch 176/596
('Training batch: ', 176)
Processing batch 177/596
('Training batch: ', 177)
Processing batch 178/596
('Training batch: ', 178)
Processing batch 179/596
('Training batch: ', 179)
Processing batch 180/596
('Training batch: ', 180)
Processing batch 181/596
('Training batch: ', 181)
Processing batch 182/596
('Training batch: ', 182)
Processing batch 183/596
('Training batch: ', 183)
Processing batch 184/596
('Training batch: ', 184)
Proce

In [None]:
for i in mean_acc:
    plt.figure()
    plt.plot(i)
    plt.title('Accuracy over iterations (for model %s)' % i)
    plt.xlabel('# Iterations')
    plt.ylabel('SVM mean accuracy')
    plt.show()

In [None]:
classes = ['aeroplane', 'bicycle', 'bus', 'car', 'horse', 'knife', 'motorcycle', 'person', 'plant', 'skateboard', 
           'train', 'truck']

In [None]:
for i in pred:
    print(classification_report(labels_t, i, target_names = classes))

In [None]:
majority_vote = np.maximum.reduce([pred[0], pred[1], pred[2]])

In [None]:
print(classification_report(labels_t, majority_vote, target_names = classes))