### Import packages

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import shutil
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import random
import cv2
import h5py
import imutils
import glob
import os
import urllib.request
from tqdm import tqdm
import pickle

from keras.models import Sequential, Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Activation
from keras.layers import UpSampling2D, BatchNormalization, Flatten, Dense, GlobalMaxPooling2D
from keras.layers.core import Dropout, Reshape
from keras.layers.merge import Concatenate
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.optimizers import RMSprop, Adam

seed = 0
np.random.seed(seed)

### Download and read data

In [5]:
train = pd.read_csv('./Data/Share/myntra_train_dataset.csv')

In [6]:
test = pd.read_csv('./Data/Share/Submission_offline3d61f7e.csv')

In [6]:
offline_test = pd.read_csv('./Data/Share/Submission_offline3d61f7e.csv')
faulty_test_offline = []
for url in tqdm(offline_test['Link_to_the_image'].dropna().drop_duplicates()):
    name = url[url.rfind('/')+1:]
    if os.path.exists('./Data/Test_offline/'+name):
        continue
    try:
        urllib.request.urlretrieve(url, './Data/Test_offline/'+name)
    except:
        print(url)
        faulty_test_offline.append(url)

  0%|          | 62/14752 [02:11<111:54:07, 27.42s/it]

http://repo.karvyecom.com/image-repository/Classic/PERLE-CTS/PERLE-CTS_1.jpg


  1%|▏         | 186/14752 [02:11<54:22:31, 13.44s/it]

http://cdn.myeshopbox.com/flipkart_images/duke-75-styles-18lot/SDVP10Q_Ambrosia/COMBO_1.jpg


  4%|▍         | 562/14752 [02:11<18:10:22,  4.61s/it]

http://images.myeshopbox.com/Duke_18_Styles/Duke_18_Styles/BBAPLDK31571/MYNTRA_2.jpg


  4%|▍         | 618/14752 [04:22<11:46:45,  3.00s/it]

http://repo.karvyecom.com/image-repository/Classic/PERLE-STB/PERLE-STB_1.jpg


  6%|▌         | 830/14752 [04:22<3:59:07,  1.03s/it] 

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2499_Sky/FLIPKART_2.jpg


  7%|▋         | 1027/14752 [06:33<2:47:51,  1.36it/s]

http://repo.karvyecom.com/image-repository/Classic/4-SEASONVA-106/4-SEASONVA-106_1.jpg


 10%|▉         | 1420/14752 [06:33<1:20:06,  2.77it/s]

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2499_Lemon/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-101-style/SD23_Cranberry/MYNTRA_2.jpg


 11%|█         | 1550/14752 [06:33<55:36,  3.96it/s]  

http://myntra.myntassets.com/assets/images/1897348/2017/5/10/11494413157851-Duke-Men-Tshirts-5921494413157651-1.jpg


 14%|█▎        | 2017/14752 [08:44<55:19,  3.84it/s]

http://repo.karvyecom.com/image-repository/Classic/4-SEASONVA-116/4-SEASONVA-116_1.jpg


 14%|█▍        | 2041/14752 [08:44<39:23,  5.38it/s]

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2673_Coral/FLIPKART_2.jpg


 16%|█▌        | 2341/14752 [10:55<53:58,  3.83it/s]

http://repo.karvyecom.com/image-repository/Classic/PERLE-RBR/PERLE-RBR_1.jpg


 20%|█▉        | 2950/14752 [10:55<35:58,  5.47it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34349/FLIPKART_2.jpg


 25%|██▍       | 3631/14752 [10:56<16:41, 11.11it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34336/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34374/FLIPKART_2.jpg
http://images.myeshopbox.com/Duke_18_Styles/Duke_18_Styles/BBAPLDK31612/MYNTRA_0.jpg


 31%|███       | 4513/14752 [10:56<10:46, 15.84it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK33598/FLIPKART_2.jpg


 34%|███▍      | 4982/14752 [13:06<24:42,  6.59it/s]

http://repo.karvyecom.com/image-repository/Classic/C-4-SEASONVA-110/C-4-SEASONVA-110_1.jpg
http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34348/FLIPKART_2.jpg


 35%|███▍      | 5147/14752 [13:06<12:05, 13.24it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK31546/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34344/FLIPKART_2.jpg


 37%|███▋      | 5470/14752 [15:17<5:32:07,  2.15s/it]

http://repo.karvyecom.com/image-repository/Classic/PERLE-BLK/PERLE-BLK_1.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2663_D.Navy/FLIPKART_2.jpg


 41%|████      | 6009/14752 [17:28<2:50:04,  1.17s/it]

http://repo.karvyecom.com/image-repository/Classic/PERLE-NVY/PERLE-NVY_1.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2780_L.Navy/FLIPKART_2.jpg


 45%|████▍     | 6599/14752 [17:28<1:51:01,  1.22it/s]

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/SD21_Anthra/FLIPKART_2.jpg


 50%|████▉     | 7312/14752 [19:39<1:17:44,  1.59it/s]

http://repo.karvyecom.com/image-repository/Classic/PERLE-SLT/PERLE-SLT_1.jpg


 54%|█████▎    | 7904/14752 [19:39<50:06,  2.28it/s]  

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2775_L.Navy/FLIPKART_2.jpg


 56%|█████▌    | 8216/14752 [19:40<23:30,  4.63it/s]

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2661_Cherry%20Tomato/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-101-style/SDVP10Q_Sirocco/COmbo%20_1.jpg


 58%|█████▊    | 8552/14752 [19:40<15:40,  6.59it/s]

https://dl.dropboxusercontent.com/sh/58h8ygy8hl75jad/AABz2a3hZsn5bRIdLlBOXS4ia/CATWTSAILJ6B1_1.jpg?dl=1


 64%|██████▎   | 9403/14752 [21:50<10:03,  8.86it/s]

http://repo.karvyecom.com/image-repository/Classic/PERLE-DSL/PERLE-DSL_1.jpg
http://images.myeshopbox.com/Duke_18_Styles/Duke_18_Styles/BBAPLDK31621/MYNTRA_0.jpg


 70%|███████   | 10333/14752 [21:51<04:05, 18.01it/s]

http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2775_Turquoise/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK31561/FLIPKART_2.jpg


 76%|███████▌  | 11214/14752 [24:01<04:54, 12.00it/s]

http://repo.karvyecom.com/image-repository/Classic/C-4-SEASONVA-105/C-4-SEASONVA-105_1.jpg


 83%|████████▎ | 12173/14752 [24:02<01:49, 23.59it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34347/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2633_Riviera/FLIPKART_2.jpg


 85%|████████▍ | 12511/14752 [26:12<05:25,  6.88it/s]

http://repo.karvyecom.com/image-repository/Classic/C-4-SEASONVA-103/C-4-SEASONVA-103_1.jpg


 90%|█████████ | 13333/14752 [26:13<01:41, 13.91it/s]

http://cdn.myeshopbox.com/flipkart_images/Duke-36-mix-lot-images/BBAPLDK34350/FLIPKART_2.jpg
http://cdn.myeshopbox.com/flipkart_images/duke-45-styles-lot24/LF2778_Cranberry/FLIPKART_2.jpg


 98%|█████████▊| 14434/14752 [36:41<3:29:10, 39.47s/it]

http://repo.karvyecom.com/image-repository/Classic/C-4-SEASONVA-113/C-4-SEASONVA-113_1.jpg


100%|██████████| 14752/14752 [42:06<00:00,  1.14it/s]  


### Extract T-shirt

In [4]:
def boundingBox(edges, im):
    gray = edges.astype('uint8')
    im_h = gray.shape[0]
    im_w = gray.shape[1]
    cnts = cv2.findContours(gray,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if imutils.is_cv2() else cnts[1]
     
    x_l, y_l, x_r, y_r = [], [], [], []
    
    # loop over the digit area candidates
    for c in cnts:
        # compute the bounding box of the contour
        (x, y, w, h) = cv2.boundingRect(c)
        
        x_l.append(x)
        y_l.append(y)
        x_r.append(x+w)
        y_r.append(y+h)
        
    
    # Getting the outer most bounding box
    x_l = min(x_l)
    y_l = min(y_l)
    x_r = max(x_r)
    y_r = max(y_r)
        
    return im[y_l:y_r, x_l:x_r, :]

In [5]:
def extractTshirt(im):
    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3, 3), 0)
    v = np.median(gray)
    sigma = 0.33

    #---- apply optimal Canny edge detection using the computed median----
    lower_thresh = int(max(0, (1.0 - sigma) * v))
    upper_thresh = int(min(255, (1.0 + sigma) * v))
    edges = cv2.Canny(blur, lower_thresh, upper_thresh)
    kernel = np.ones((3, 3), np.uint8)
    dilated = cv2.morphologyEx(edges, cv2.MORPH_DILATE, kernel)
    bounded = boundingBox(dilated, im)
    if bounded.shape[0]<1250:
        bounded = bounded
    else:
        height = bounded.shape[0]
        width = bounded.shape[1]
        bounded = bounded[int(1.3*height//4):int(3.5*(height//4)), :, :]
    return bounded

In [None]:
dims_test = []
for im_path in tqdm(glob.glob('./Data/Test_offline/*')):
    im = cv2.imread(im_path)
    if im is not None:
        out_file = './Data/Test_offline_cleaned/'+im_path[im_path.rfind('/')+1:im_path.rfind('.')]+'.jpg'
        if not os.path.exists(out_file):
            im = extractTshirt(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
            cv2.imwrite(out_file, cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
            dims_test.append(im.shape)

### Keras Model

In [7]:
labels = pd.get_dummies(train['Sub_category'])
labels.index = train.Link_to_the_image.astype(str).map(lambda x : x[x.rfind('/')+1:x.rfind('.')]+'.jpg')

In [8]:
test_files = glob.glob('./Data/Test_offline_cleaned/*')

In [9]:
width = 75
height = 75
channels = 3

#### Test

In [10]:
x_im_test = np.empty((len(test_files), height, width, channels))
link_test = []
for idx, im_path in enumerate(tqdm(test_files)):
    im = cv2.cvtColor(cv2.imread(im_path), cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (width, height), interpolation=cv2.INTER_AREA).reshape((height, width, channels))
    im_path = im_path[im_path.rfind('/')+1:]
    x_im_test[idx] = im
    link_test.append(im_path)

100%|██████████| 14701/14701 [03:39<00:00, 66.92it/s]


In [12]:
with h5py.File('./Data/test_data_offline.h5', 'w') as hf:
    hf.create_dataset("x_im",  data=x_im_test, maxshape=(None, height, width, channels))
pickle.dump(link_test, open('./Data/test_data_offline_links.pickle', 'wb'))

In [14]:
with h5py.File('./Data/test_data_offline.h5', 'r') as hf:
    x_im_test = hf['x_im'][:]/255.0
link_test = pickle.load(open('./Data/test_data_offline_links.pickle', 'rb'))

In [15]:
x_test = x_im_test

In [16]:
def get_model():
    input_ = Input(shape=(height, width, channels), name="X_1")
    
    # Layers 1+2
    out = Conv2D(9, (3, 3), padding='valid')(input_)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = Conv2D(18, (3, 3), padding='valid')(out)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = MaxPooling2D((2, 2), strides=(2, 2))(out)
    out = Dropout(0.1)(out)
    
    # Layers 2+3
    out = Conv2D(36, (3, 3), padding='valid')(out)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = Conv2D(72, (3, 3), padding='valid')(out)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = MaxPooling2D((2, 2), strides=(2, 2))(out)
#     out = Dropout(0.1)(out)

    # Layer 4
    out = Conv2D(144, (2, 2), padding='valid')(out)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = MaxPooling2D((2, 2), strides=(2, 2))(out)
#     out = Dropout(0.1)(out)
    
    # Layer 5
    out = Conv2D(256, (2, 2), padding='valid')(out)
    out = BatchNormalization()(out)
    out = Activation('relu')(out)
    out = MaxPooling2D((2, 2), strides=(2, 2))(out)
#     out = Dropout(0.1)(out)
    
    out = Flatten()(out)
    
    # FCs 1+2+3
    out = Dense(1024, activation='relu')(out)
    out = Dropout(0.5)(out)
    out = Dense(196, activation='relu')(out)
    out = Dropout(0.1)(out)
    out = Dense(24, activation="softmax")(out)

    model = Model(input_, out)
    optimizer = Adam()
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    return model

model = get_model()

### KFold

In [18]:
kfold = 5
kfold_scores = []

results = []
sss = KFold(n_splits=kfold, random_state=0)
for i in range(5):
    
    model.load_weights('keras_models/kfold_w_arms_col/best_weights_'+str(i+1)+'.hdf5')

    # Predict
    preds_test = model.predict(x_test)    
    results.append(preds_test)
    
label_list = list(labels.columns)

In [19]:
label_list = list(labels.columns)
label_map_dict= {i.lower() : i for i in label_list}
combined_results = np.argmax(np.array(results).mean(axis=0), 1)
combined_results = [label_list[i] for i in combined_results]

In [20]:
b = set([i.lower() for i in labels.columns])
test['leakage_labs'] = test.Link_to_the_image.astype(str).map(lambda x :
                                                      list(set(x[x.rfind('/')+1:x.rfind('.')].lower() \
                                                      .split('-')).intersection(b)))
test['leakage_labs'] = test['leakage_labs'].map(lambda x : label_map_dict[x[0]] if len(x) > 0 else np.NaN)

In [21]:
test['Key'] = test.Link_to_the_image.astype(str).map(lambda x : x[x.rfind('/')+1:x.rfind('.')]+'.jpg')
test['prediction'] = test['Key'].map(lambda x : combined_results[link_test.index(x)] 
                                     if x in link_test else '')
test['Sub_category'] = test['leakage_labs'].fillna(test['prediction'])

In [22]:
test = test.drop(['Key', 'leakage_labs', 'prediction'], axis=1)

In [23]:
test = test.fillna('')
test.to_csv('./Submissions/Sub_offline.csv', index=False)

In [24]:
{x:combined_results.count(x) for x in combined_results}

{'Abstract': 251,
 'Biker': 4,
 'Camouflage': 27,
 'Checked': 21,
 'Colourblocked': 462,
 'Conversational': 151,
 'Floral': 289,
 'Geometric': 278,
 'Graphic': 1171,
 'Humour and Comic': 48,
 'People and Places': 125,
 'Self Design': 4,
 'Solid': 5814,
 'Sports': 1,
 'Sports and Team Jersey': 3,
 'Striped': 2337,
 'Superhero': 79,
 'Tribal': 1,
 'Typography': 3616,
 'Varsity': 19}