In [9]:
import pandas as pd
import json

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.manifold import TSNE

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from functools import partial
import functools
import numpy as np
import argparse
import random
import pickle
import cv2
import os
import seaborn as sns
from skimage.io import imread
from skimage.transform import resize
from PIL import ImageFile
from tqdm import tqdm
import h5py

%pylab inline
%matplotlib inline

import tensorflow as tf
import keras
import keras.backend as K
from keras.models import Model
from keras import optimizers
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.layers import *
from keras.backend.tensorflow_backend import set_session
from keras.utils.np_utils import to_categorical
from keras.utils.training_utils import multi_gpu_model
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator, img_to_array

from collections import defaultdict
import time
from PIL import Image
import cv2 as cv
import urllib

from skimage.io import imread
from skimage.transform import resize

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.7
set_session(tf.Session(config=config))

In [2]:
target_size = 256
epochs = 1 # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 100
steps_per_epoch = 100
INIT_LR = 0.01

In [None]:
# your preprocessing

In [8]:
# labels contains a list of list with the tags of each images
labels = np.array(labels)
# [list(['women', 'shoes', 'black'])
# list(['men', 'shoes', 'green'])...]

In [None]:
# transform tags to hot encoder
mlb = MultiLabelBinarizer()
lbl = mlb.fit_transform(labels)
lbl

In [None]:
# print classes and number of classes
print (mlb.classes_)
print (len(mlb.classes_))

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

In [None]:
(X_train, X_test, y_train, y_test) = train_test_split(imgs,
                                                      lbl,
                                                      test_size=0.2,
                                                      random_state=42)

In [265]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=[150, 150, 3])

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
# you need to use sigmoid instead of softmax for multi-label classification
# https://stackoverflow.com/questions/44164749/how-does-keras-handle-multilabel-classification?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
predictions = Dense(len(mlb.classes_), activation='sigmoid')(x)

In [None]:
with tf.device("/cpu:0"):
    model = Model(inputs=base_model.input, outputs=predictions)
    
print("Number of layers : " + str(len(model.layers)))

multi_model = multi_gpu_model(model, gpus = 2)

# the last 50 layers are not trainable
for layer in model.layers:
    layer.trainable = True

for layer in model.layers[:-50]:
    layer.trainable = False

# binary crossentropy is also needed for multi-label
model.compile(optimizer = 'Adam',
              loss = 'binary_crossentropy',
              metrics = ['categorical_accuracy'])

#if yopu just want to train on a single gpu, just comment the multi_model
multi_model.compile(optimizer = 'Adam',
              loss = 'binary_crossentropy',
              metrics = ['categorical_accuracy'])

# comment the code down below if you dont use tensorboard
index = 0

while (True):
    if not os.path.exists('logs/' + str(index)):
        os.makedirs('logs/' + str(index))
        break ;
    index += 1
    
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs/' + str(index), 
                                          histogram_freq=0, 
                                          batch_size=batch_size, 
                                          write_graph=True, 
                                          write_grads=True, 
                                          write_images=True, 
                                          embeddings_freq=0,
                                          embeddings_layer_names=None, 
                                          embeddings_metadata=None)

In [268]:
# a multi generator for multi-label classification
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(1,150,150), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def read_and_resize(self, filepath):
        img = imread(filepath)
        res = resize(img, (150, 150), preserve_range=True, mode='reflect')
        return np.expand_dims(res, 0)
            
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'
        # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        X = [self.read_and_resize(self.list_IDs[i])
             for i in list_IDs_temp]
        y = self.labels[list_IDs_temp]
        X = np.vstack(X)
        return X, y

In [None]:
params = {
            'dim': (1,150,150),
            'batch_size': 128,
            'n_classes': len(mlb.classes_),
            'n_channels': 3,
            'shuffle': True
         }

training_generator = DataGenerator(X_train, y_train, **params)
validation_generator = DataGenerator(X_test, y_test, **params)

In [1]:
# model.fit_generator for a single gpu
multi_model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6,
                    epochs=10)

In [None]:
def read_and_resize(filepath):
    img = imread(filepath)
    res = resize(img, (150, 150), preserve_range=True, mode='reflect')
    return np.expand_dims(res, 0)

In [None]:
path_to_img = ''
#convert png to jpg comment if you don't need it
im = Image.open(path_to_img)
rgb_im = im.convert('RGB')
rgb_im.save('image.jpg')

In [None]:
image = read_and_resize('image.jpg')

In [None]:
proba = multi_model.predict(image)

In [None]:
print (proba)

In [None]:
proba = model.predict(image)[0]
idxs = np.argsort(proba)[::-1][:2]
print (proba)
print (idxs)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

In [None]:
img=mpimg.imread('image.jpg')
imgplot = plt.imshow(img)

In [None]:
# loop over the indexes of the high confidence class labels
for (i, j) in enumerate(idxs):
    label = "{}: {:.2f}%".format(mlb.classes_[j], proba[j] * 100)

for (label, p) in zip(mlb.classes_, proba):
    if (p > 0.05):
        print (label, p * 100)

print (mlb.classes_)

In [None]:
# if you want to save a multi_model, save from the models 
model.save('multi_label_model.h5')