In [None]:
!pip install kaggle
!mkdir .kaggle
!touch .kaggle/kaggle.json 
!chmod 600 .kaggle/kaggle.json
# add kaggle_creds to kaggle.json
!kaggle competitions download -c imaterialist-challenge-fashion-2018

In [None]:
!unzip /content/.kaggle/competitions/imaterialist-challenge-fashion-2018/test.json.zip -d data/
!unzip /content/.kaggle/competitions/imaterialist-challenge-fashion-2018/train.json.zip -d data/
!unzip /content/.kaggle/competitions/imaterialist-challenge-fashion-2018/validation.json.zip -d data/

In [1]:
import json
import threading
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

from urllib.request import urlopen
from sklearn.preprocessing import MultiLabelBinarizer

from keras.callbacks import ModelCheckpoint   
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, GlobalAveragePooling2D, GlobalMaxPooling2D, MaxPooling2D

DATA_DIR = "data/"
NUM_CLASSES = 228

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
with open(DATA_DIR + "train.json") as train, open(DATA_DIR + "test.json") as test, open(DATA_DIR + "validation.json") as validation:
    train_json = json.load(train)
    test_json = json.load(test)
    validation_json = json.load(validation)
    

train_urls = [obj['url'] for obj in train_json['images']]
test_urls = [obj['url'] for obj in test_json['images']]
validation_urls = [obj['url'] for obj in validation_json['images']]

train_urls[:5]

FileNotFoundError: [Errno 2] No such file or directory: 'data/train.json'

In [None]:
def generate_label_array(json_obj):
    result = []
    for data in json_obj['annotations']:
        temp_array = [0] * NUM_CLASSES
        for elem in data['labelId']:
            temp_array[int(elem) - 1] = 1
        result.append(temp_array)
    return np.array(result)

train_labels = generate_label_array(train_json)
validation_labels = generate_label_array(validation_json)

In [None]:
TARGET_SIZE=(256,256)

rand_img = np.random.randint(0, len(train_urls))
img_label = np.array(train_labels[rand_img]).reshape(1, 228)
img_path = train_urls[rand_img]
img_file = urlopen(img_path)
image = Image.open(img_file)
image_resized = image.resize(TARGET_SIZE, Image.ANTIALIAS)
image_resized.thumbnail(TARGET_SIZE, Image.ANTIALIAS)
plt.imshow(np.asarray(image_resized))
plt.show()

In [None]:
class BatchGenerator:
    def __init__(self, img_array, label_array, batch_size=32, target_size=(256,256)):
        self.img_array = img_array
        self.label_array = label_array
        self.batch_size = batch_size
        self.lock = threading.Lock()
        self.TARGET_SIZE=target_size
        
    def __iter__(self):
        return self
    
    def next(self):
        return self.__next__()
      
    def __next__(self):
        with self.lock:
            while True:
                X = []
                y = []
                for i in range(self.batch_size):
                    # get a random number
                    rand_img = np.random.randint(0, len(self.img_array))
                    # get label from random number
                    img_label = np.array(self.label_array[rand_img]).reshape(1, 228)
                    # get image path from random number
                    img_path = self.img_array[rand_img]
                    # open image
                    try:
                        img_file = urlopen(img_path)
                        image = Image.open(img_file)
                    except:
                        output = [1]*(256*256*3)
                        output = np.array(output).reshape(256,256,3).astype('uint8')
                        image = Image.fromarray(output).convert('RGB')
                    # resize image
                    image_resized = image.resize(self.TARGET_SIZE, Image.ANTIALIAS)
                    # set image to thumbnail (proper scaling)
                    image_resized.thumbnail(self.TARGET_SIZE, Image.ANTIALIAS)
                    # cast image as np.array
                    X_batch = np.asarray(image_resized).reshape(1, 256, 256, 3)
                    # with proper datatype
                    X_batch = X_batch / 255.0
                    X.append(X_batch)
                    y.append(img_label)
                return np.array(X).reshape(self.batch_size, 256, 256, 3), np.array(y).reshape(self.batch_size, 228)           

train_gen = BatchGenerator(train_urls, train_labels)
val_gen = BatchGenerator(validation_urls, validation_labels)

In [None]:
model = Sequential()
model.add(Conv2D(filters=30, kernel_size=3, input_shape=(256, 256, 3)))
model.add(Dropout(0.4))
model.add(GlobalMaxPooling2D())
model.add(Dropout(0.3))
model.add(Dense(30, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(228, activation='softmax'))
model.summary()

In [None]:
EPOCHS = 10
STEPS = 250

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

checkpointer = ModelCheckpoint(
    filepath='model.best.hdf5', 
    verbose=1,
    save_best_only=True
)

model.fit_generator(
    train_gen, 
    epochs = EPOCHS,
    steps_per_epoch = STEPS,
    callbacks = [checkpointer],
    validation_data = val_gen,
    validation_steps = 1
)