In [None]:
from keras.applications.resnet50 import ResNet50 as CNN
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense
from keras import backend as K

import pandas as pd
import bson
import os
from tqdm import *

from utilities import utils

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
data_dir = "inputs/"

test_bson_path = os.path.join(data_dir, "test.bson")
num_test_products = 1768172

categories_df = pd.read_csv("inputs/categories.csv", index_col=0)
_, idx2cat = utils.make_category_tables(categories_df)

input_size = 197

In [None]:
submission_df = pd.read_csv(data_dir + "sample_submission.csv")
submission_df.head()

In [None]:
test_datagen = ImageDataGenerator() #ImageDataGenerator(preprocessing_function=preprocess_input)
data = bson.decode_file_iter(open(test_bson_path, "rb"))

In [None]:
model = CNN(include_top=False, input_shape=(input_size, input_size, 3), weights=None)
classifier = Dense(num_classes, activation='softmax')(model.output)

model = Model(inputs=model.input, outputs=classifier)

model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

#model.load_weights("weights/")

In [None]:
with tqdm(total=num_test_products) as pbar:
    for c, d in enumerate(data):
        product_id = d["_id"]
        num_imgs = len(d["imgs"])

        batch_x = np.zeros((num_imgs, input_size, input_size, 3), dtype=K.floatx())

        for i in range(num_imgs):
            bson_img = d["imgs"][i]["picture"]

            # Load and preprocess the image.
            img = load_img(io.BytesIO(bson_img), target_size=(input_size, input_size))
            x = img_to_array(img)
            x = test_datagen.random_transform(x)
            x = test_datagen.standardize(x)

            # Add the image to the batch.
            batch_x[i] = x

        prediction = model.predict(batch_x, batch_size=num_imgs)
        avg_pred = prediction.mean(axis=0)
        cat_idx = np.argmax(avg_pred)

        submission_df.iloc[c]["category_id"] = idx2cat[cat_idx]        
        pbar.update()

In [None]:
submission_df.to_csv("my_submission.csv.gz", compression="gzip", index=False)