In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm_notebook as tqdm
from keras.utils.data_utils import Sequence
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout, Flatten, Convolution2D, GlobalAveragePooling2D, GlobalMaxPooling2D, MaxPooling2D
from keras import applications
from keras import optimizers

DATA_DIR = "../data/"
NUM_CLASSES = 228

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
with open(DATA_DIR + "test.json") as test:
    test_json = json.load(test)
    
# test_urls = [obj['url'] for obj in test_json['images']]
test_paths = ["../data/test/{}.jpg".format(obj['imageId']) for obj in test_json['images']]
print(test_paths[:3])

['../data/test/1.jpg', '../data/test/2.jpg', '../data/test/3.jpg']


In [3]:
# IMAGE_SIZE = 100
# conv_base = applications.Xception(weights = "imagenet", include_top=False, input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3))
# for layer in conv_base.layers[:3]:
#     layer.trainable = False
# model = Sequential()
# model.add(conv_base)
# model.add(Flatten())
# model.add(Dense(1024, activation='relu'))
# model.add(Dropout(0.4))
# model.add(Dense(1024, activation='relu'))
# model.add(Dense(NUM_CLASSES, activation='softmax'))
# model.load_weights(DATA_DIR + "model.best.100.hdf5")

# model.compile(
#     loss = "categorical_crossentropy", 
#     optimizer = optimizers.SGD(lr=0.0, momentum=0.9, decay=0.0, nesterov=False),
#     metrics=["accuracy"]
# )

IMAGE_SIZE = 75
conv_base = applications.Xception(weights = "imagenet", include_top=False, input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3))

for layer in conv_base.layers[:3]:
    layer.trainable = False

model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1024, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.load_weights(DATA_DIR + "model.75.hdf5")

model.compile(
    loss = "categorical_crossentropy", 
    optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), 
    metrics=["accuracy"]
)

In [4]:
class TestBatchSequence(Sequence):
    def __init__(self, x_set, batch_size, resize = False):
        self.x = x_set
        self.batch_size = batch_size
        self.resize = resize

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        images = np.empty([len(batch_x), IMAGE_SIZE, IMAGE_SIZE, 3])
        for i, path in enumerate(batch_x):
            try:
                if self.resize:
                    img = Image.open(path)
                    img.thumbnail((IMAGE_SIZE, IMAGE_SIZE))
                    image = np.array(img)
                else:
                    image = np.array(Image.open(path))
            except Exception as e:
                print(e)
                output = [1]*(IMAGE_SIZE*IMAGE_SIZE*3)
                output = np.array(output).reshape(IMAGE_SIZE,IMAGE_SIZE,3).astype('uint8')
                image = Image.fromarray(output).convert('RGB')
            images[i, ...] = image
        return images 

In [5]:
%%time
BATCH = 64
STEPS = len(test_paths) // BATCH

test_seq = TestBatchSequence(test_paths, BATCH, resize = True)
# test_seq = TestBatchSequence(test_paths, BATCH, resize = False)

probs = model.predict_generator(
    test_seq,
    steps = STEPS + 1,
    workers = 5,
    verbose = 1
)

Wall time: 36.8 s


In [6]:
probs[0]

array([1.5855099e-05, 4.1505727e-03, 3.8648266e-04, 1.0924552e-03,
       7.1759155e-04, 7.1294216e-04, 4.2944127e-03, 2.7698051e-04,
       2.0048737e-03, 3.5813719e-04, 1.2991928e-03, 2.2910607e-04,
       1.5680384e-03, 2.1333904e-03, 1.7441035e-03, 5.5035025e-06,
       4.2830363e-02, 5.3071203e-03, 2.3405977e-02, 1.9155590e-02,
       6.6758005e-04, 1.6949662e-04, 5.8537571e-05, 6.1446386e-05,
       1.6862374e-03, 1.6095447e-03, 1.3143205e-04, 2.8503847e-03,
       1.6214438e-04, 1.0390450e-03, 5.0106813e-04, 2.8356204e-03,
       6.7449303e-04, 6.3253025e-04, 7.6066202e-04, 8.1022028e-03,
       2.4137851e-03, 1.3389267e-03, 6.6413888e-04, 1.6092868e-03,
       1.1181124e-05, 9.4155950e-04, 6.5829279e-04, 1.1095888e-02,
       5.0145725e-04, 1.3181295e-05, 1.9138595e-03, 1.4898842e-03,
       1.1476087e-02, 1.5895652e-04, 1.7743589e-03, 1.6969508e-03,
       1.6870156e-02, 5.3524366e-04, 1.4709062e-03, 9.1324199e-04,
       5.2559882e-04, 6.3228840e-04, 6.9976416e-03, 5.9387094e

In [15]:
def generate_prob_labels(probas):
    label_preds = []
    for i in range(len(probas)):
        labels = []
        proba = list(probas[i])
        for i, elem in enumerate(proba):
            if elem > 0.03:
                labels.append(i + 1)
        label_preds.append(labels)
    return label_preds

probas = generate_prob_labels(probs)

In [16]:
print(probas[:5])

[[17, 66, 105, 106, 153, 171, 214], [17, 66, 105, 106, 153, 171, 214], [17, 66, 105, 106, 153, 171, 214], [17, 66, 105, 106, 153, 171, 214], [17, 66, 105, 106, 153, 171, 214]]


In [17]:
with open("submission-new.csv","w") as f:
    f.write("image_id,label_id\n")
    for i, labels in tqdm(enumerate(probas), total = len(probas)):
        output_labels = " ".join(str(x) for x in labels)
        f.write("{},{}\n".format(i + 1, output_labels))


