## Colab Setup

In [None]:
import warnings 
warnings.filterwarnings("ignore")

In [None]:
# importing libraries
import os
import gc
import sys
import math
import json
import glob
import random
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import skimage.io
from IPython.display import clear_output

import itertools
from tqdm import tqdm

from imgaug import augmenters as iaa
from sklearn.model_selection import StratifiedKFold, KFold

import tensorflow as tf

In [None]:
TRAIN_IMAGE_DIR = Path('/kaggle/input/imaterialist-fashion-2019-FGVC6/train')
DATA_DIR = Path('/kaggle/input/imaterialist-fashion-2019-FGVC6')
ROOT_DIR = Path('/kaggle/working')

## Data Import

In [None]:
# import train file 
import pandas as pd
train = pd.read_csv(str(DATA_DIR/'train.csv'))
train.head()
# train = train[train['ClassId'] <= "10"]
# train.head()


In [None]:
print(len(train))

In [None]:
# extracting image metadata fom json file 
with open(str(DATA_DIR/'label_descriptions.json')) as f:
    label_descriptions = json.load(f)

label_names = [x['name'] for x in label_descriptions['categories']]
label_names = label_names[:11]
label_names

In [None]:
label_df = pd.DataFrame(label_names).reset_index()
label_df.columns = ['Id','Labels']
label_df.head()

In [None]:
print(len(label_df))

In [None]:
#ClassId = 0,1 and 10

segment_df = train
segment_df['CategoryId'] = segment_df['ClassId'].str.split('_').str[0]
df1 = segment_df[segment_df['CategoryId'] <= "10"]
# df2 = segment_df[segment_df['CategoryId'] == "1"] 
# df3 = segment_df[segment_df['CategoryId'] == "10"]

# segment_df = segment_df[:10]
# df1 = df1.append(df2)
# df1 = df1.append(df3)

segment_df = df1
print("Total segments: ", len(segment_df))
segment_df

In [None]:
# segment_df = train
# segment_df['CategoryId'] = segment_df['ClassId'].str.split('_').str[0]
# # segment_df = segment_df[segment_df['ClassId'] <= "10"]
# # train.head()
# print("Total segments: ", len(segment_df))
# segment_df

In [None]:
# Rows with the same image are grouped together because the subsequent operations perform at an image level
image_df = segment_df.groupby('ImageId')['EncodedPixels', 'CategoryId'].agg(lambda x: list(x))
size_df = segment_df.groupby('ImageId')['Height', 'Width'].mean()
image_df = image_df.join(size_df, on='ImageId')

print("Total images: ", len(image_df))
image_df

## EDA

In [None]:
plt.figure(figsize=(15, 7))
sns.jointplot(x=image_df['Width'], y=image_df['Height'])

In [None]:
plt.figure(figsize=(7, 5))
sns.distplot(image_df['Height'], kde=False);
plt.title("Height Distribution", fontsize=10)
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.distplot(image_df['Width'], kde=False);
plt.title("Width Distribution", fontsize=10)
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.distplot((image_df['Height'] * image_df['Width'])/10000, kde=False);
plt.title("Area Distribution /(10000)", fontsize=10)
plt.xlabel(" Area (in 10k)", fontsize=10)
plt.show()

In [None]:
# number of labels per image
labels_per_image = image_df['CategoryId'].map(lambda x:len(x)).value_counts().to_frame().reset_index().sort_values(by = 'index')
labels_per_image.columns = ['#labels','#images']

plt.figure(figsize=(15, 7))
sns.barplot(labels_per_image['#labels'],labels_per_image['#images'])
plt.title("Number of Labels per Image", fontsize=20)
plt.xlabel("# of labels", fontsize=20)
plt.ylabel("# of images", fontsize=20)
plt.show()

In [None]:
segment_df['CategoryId'] = segment_df['CategoryId'].astype('int64')
labels_per_image2 = segment_df.merge(label_df, how='left', left_on='CategoryId', right_on='Id')
labels_per_image3 = labels_per_image2.groupby('Labels')['ImageId'].nunique().to_frame().reset_index()
labels_per_image3.head()

In [None]:
labels_per_image4 = labels_per_image2.groupby('Labels')['ImageId'].count().to_frame().reset_index()
labels_per_image4.head()

In [None]:
labels_per_image4.to_csv('word_cloud_data.csv')

In [None]:
d = {}
for i in range(len(labels_per_image4)):
    d[labels_per_image4.iloc[i,0]] = labels_per_image4.iloc[i,1]

In [None]:
from wordcloud import WordCloud

wordcloud = WordCloud(background_color='Ghostwhite')
wordcloud.generate_from_frequencies(frequencies=d)

plt.figure(figsize=(25, 7))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

In [None]:
plt.figure(figsize=(20, 7))
sns.barplot(labels_per_image3['Labels'],labels_per_image3['ImageId'])
plt.xticks(rotation=90)
plt.title("Labels Distribution in Images", fontsize=20)
plt.xlabel("labels", fontsize=10)
plt.ylabel("# of images", fontsize=10)
plt.show()

## Data Setup

In [None]:
# Since we are training on ~5k images, we will fetch train data for those 5k images

images = os.listdir(TRAIN_IMAGE_DIR)
uploaded_images = pd.DataFrame(images, columns = ['image_name'])
image_df = image_df[image_df.index.isin(uploaded_images['image_name'])]

In [None]:
image_df.shape

In [None]:
# Partition data in train and test
FOLD = 0
N_FOLDS = 10

kf = KFold(n_splits=N_FOLDS, random_state=42, shuffle=True)
splits = kf.split(image_df) # ideally, this should be multilabel stratification

def get_fold():    
    for i, (train_index, valid_index) in enumerate(splits):
        if i == FOLD:
            return image_df.iloc[train_index], image_df.iloc[valid_index]
        
train_df, valid_df = get_fold()

## Setting up Mask RCNN

In [None]:
!rm -rf Mask_RCNN 

In [None]:
# import matterport Mask-RCNN implementation
!git clone https://github.com/Kedar-V/Mask_RCNN.git
os.chdir('Mask_RCNN')

!rm -rf .git # to prevent an error when the kernel is committed
!rm -rf images assets # to prevent displaying images at the bottom of a kernel

sys.path.append(ROOT_DIR/'Mask_RCNN')
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

In [None]:
!wget --quiet https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5
!ls -lh mask_rcnn_coco.h5

COCO_WEIGHTS_PATH = "mask_rcnn_coco.h5"

In [None]:
# # Already have trained weights, we will continue on those weights
# pre_trained_weight = '/content/drive/My Drive/Projects/iMaterialist/trained weights/weights_0.08133.h5'

In [None]:
# Set configuration

NUM_CATS = 11  # classification ignoring attributes (only categories)
IMAGE_SIZE = 512 # the image size is set to 512, which is the same as the size of submission masks

class FashionConfig(Config):
    NAME = "fashion"
    NUM_CLASSES = NUM_CATS + 1 # +1 for the background class
    
    GPU_COUNT = 1
    IMAGES_PER_GPU = 4 # Batch size - memory error occurs when IMAGES_PER_GPU is too high
    #https://datascience.stackexchange.com/questions/29719/how-to-set-batch-size-steps-per-epoch-and-validation-steps
    
    BACKBONE = 'resnet50' #resnet50 will be lighter than resnet101 for training
    
    IMAGE_MIN_DIM = IMAGE_SIZE
    IMAGE_MAX_DIM = IMAGE_SIZE    
    IMAGE_RESIZE_MODE = "none"
    
#     RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
    RPN_ANCHOR_SCALES = (4, 8, 16, 32, 64)
    DETECTION_MIN_CONFIDENCE = 0.7
    DETECTION_NMS_THRESHOLD = 0.7

    STEPS_PER_EPOCH = 1000
    VALIDATION_STEPS = 200

    MAX_GT_INSTANCES = 12
    DETECTION_MAX_INSTANCES = 12

    ## balance out losses
    # https://stackoverflow.com/questions/55360262/what-exactly-are-the-losses-in-matterport-mask-r-cnn
    # https://stackoverflow.com/questions/46272841/what-is-the-loss-function-of-the-mask-rcnn
    LOSS_WEIGHTS = {
          "rpn_class_loss": 1.0, # How well the Region Proposal Network separates background with objetcs
          "rpn_bbox_loss": 0.8, # How well the RPN localize objects
          "mrcnn_class_loss": 6.0, # How well the Mask RCNN localize objects
          "mrcnn_bbox_loss": 6.0, # How well the Mask RCNN recognize each class of object
          "mrcnn_mask_loss": 6.0 # How well the Mask RCNN segment objects
    }
    
config = FashionConfig()
config.display()

In [None]:
# resizing image to 512X512;
def resize_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_AREA)  
    return img

In [None]:
#  MaskRCNN Class

class FashionDataset(utils.Dataset):

    def __init__(self, df):
        super().__init__(self)
        
        #Add classes
        for i, name in enumerate(label_names):
            self.add_class("fashion", i+1, name)
#         self.add_class("fashion", 1, "shirt, blouse")
#         self.add_class("fashion", 2, "top, t-shirt, sweatshirt")
#         self.add_class("fashion", 10, "dress")
        
        # Add images 
        for i, row in df.iterrows():
            self.add_image("fashion", 
                           image_id=row.name, 
                           path=str(TRAIN_IMAGE_DIR/row.name), 
                           labels=row['CategoryId'],
                           annotations=row['EncodedPixels'], 
                           height=row['Height'], width=row['Width'])

    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path'], [label_names[int(x)] for x in info['labels']]
    
    def load_image(self, image_id):
        return resize_image(self.image_info[image_id]['path'])

    def load_mask(self, image_id):
        info = self.image_info[image_id]
                
        mask = np.zeros((IMAGE_SIZE, IMAGE_SIZE, len(info['annotations'])), dtype=np.uint8)
        labels = []
        
        for m, (annotation, label) in enumerate(zip(info['annotations'], info['labels'])):
            sub_mask = np.full(info['height']*info['width'], 0, dtype=np.uint8)
            annotation = [int(x) for x in annotation.split(' ')]
            
            for i, start_pixel in enumerate(annotation[::2]):
                sub_mask[start_pixel: start_pixel+annotation[2*i+1]] = 1

            sub_mask = sub_mask.reshape((info['height'], info['width']), order='F')
            sub_mask = cv2.resize(sub_mask, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_NEAREST)
            
            mask[:, :, m] = sub_mask
            labels.append(int(label)+1)
            
        return mask, np.array(labels)

In [None]:
# Visualizing random images
dataset = FashionDataset(image_df)
dataset.prepare()

for i in range(1):
    image_id = random.choice(dataset.image_ids)
    print(dataset.image_reference(image_id))
    
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    print(class_ids)
    print(dataset.class_names)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names, limit=4)

In [None]:
# Prepare Data
train_dataset = FashionDataset(train_df)
train_dataset.prepare()

valid_dataset = FashionDataset(valid_df)
valid_dataset.prepare()

## Training Model

In [None]:
# Image augmentation
augmentation = iaa.Sequential([
    iaa.OneOf([ ## rotate
        iaa.Affine(rotate=0),
        iaa.Affine(rotate=90),
        iaa.Affine(rotate=180),
        iaa.Affine(rotate=270),
    ]),
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.OneOf([ ## brightness or contrast
        iaa.Multiply((0.9, 1.1)),
        iaa.ContrastNormalization((0.9, 1.1)),
    ]),
    iaa.OneOf([ ## blur or sharpen
        iaa.GaussianBlur(sigma=(0.0, 0.3)),
        iaa.Sharpen(alpha=(0.0, 0.3)),
    ]),
])

In [None]:
# sample augmentation output
imggrid = augmentation.draw_grid(image, cols=5, rows=2)
plt.figure(figsize=(20, 10))
_ = plt.imshow(imggrid.astype(int))

In [None]:
# initiating Mask R-CNN training

model = modellib.MaskRCNN(mode='training', config=config, model_dir=ROOT_DIR);
model.load_weights(COCO_WEIGHTS_PATH, by_name=True, exclude=[
    'mrcnn_class_logits', 'mrcnn_bbox_fc', 'mrcnn_bbox', 'mrcnn_mask'])

In [None]:
# Declaring learning rate
LR = 0.0001

In [None]:
## train head layer alone

# %%time
# model.train(train_dataset, valid_dataset,
#             learning_rate=LR*2,
#             epochs=2, # EPOCHS[0],
#             layers='heads',
#             augmentation=augmentation)
# history = model.keras_model.history.history
# history

In [None]:
# %%time
# model.train(train_dataset, valid_dataset,
#             learning_rate=LR/4,
#             epochs=1,
#             layers='all',
#             augmentation=augmentation)

# # new_history = model.keras_model.history.history
# # for k in new_history: history[k] = history[k] + new_history[k]
# history = model.keras_model.history.history

In [None]:
# epochs = range(1, len(history['loss'])+1)
# pd.DataFrame(history, index=epochs)

# # find best epoch
# best_epoch = np.argmin(history["val_loss"]) + 1
# print("Best epoch: ", best_epoch)
# print("Valid loss: ", history["val_loss"][best_epoch-1])

# glob_list = glob.glob(f'/kaggle/working/fashion*/mask_rcnn_fashion_{best_epoch:04d}.h5')
# model_path = glob_list[0] if glob_list else ''
# print(model_path)

In [None]:
# %%time
# model.train(train_dataset, valid_dataset,
#             learning_rate=LR/8,
#             epochs=15,
#             layers='all',
#             augmentation=augmentation)

# new_history = model.keras_model.history.history
# for k in new_history: history[k] = history[k] + new_history[k]

In [None]:
# epochs = range(1, len(history['loss'])+1)
# pd.DataFrame(history, index=epochs)

In [None]:
# find best epoch
# best_epoch = np.argmin(history["val_loss"]) + 1
# # best_epoch = 20
# print("Best epoch: ", best_epoch)
# print("Valid loss: ", history["val_loss"][best_epoch-1])

In [None]:
# os.chdir('/kaggle/working')
# !ls

In [None]:
# glob_list = glob.glob(f'/kaggle/working/fashion*/mask_rcnn_fashion_{best_epoch:04d}.h5')
# model_path = glob_list[0] if glob_list else ''
# print(model_path)
model_path = "/kaggle/input/10-classes/mask_rcnn_fashion_0020.h5"


In [None]:
# model_path = '/content/fashion20191109T2055/mask_rcnn_fashion_0007.h5'

## Prediction

In [None]:
# import os
# print(os.listdir("/kaggle/input/10-classes/mask_rcnn_fashion_0020.h5"))

In [None]:
# Prediction, this cell defines InferenceConfig and loads the best trained model.

class InferenceConfig(FashionConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

model = modellib.MaskRCNN(mode='inference', 
                          config=inference_config,
                          model_dir=ROOT_DIR)

model.load_weights(model_path, by_name=True)

In [None]:
# Since the submission system does not permit overlapped masks, we have to fix them
def refine_masks(masks, rois):
    areas = np.sum(masks.reshape(-1, masks.shape[-1]), axis=0)
    mask_index = np.argsort(areas)
    union_mask = np.zeros(masks.shape[:-1], dtype=bool)
    for m in mask_index:
        masks[:, :, m] = np.logical_and(masks[:, :, m], np.logical_not(union_mask))
        union_mask = np.logical_or(masks[:, :, m], union_mask)
    for m in range(masks.shape[-1]):
        mask_pos = np.where(masks[:, :, m]==True)
        if np.any(mask_pos):
            y1, x1 = np.min(mask_pos, axis=1)
            y2, x2 = np.max(mask_pos, axis=1)
            rois[m, :] = [y1, x1, y2, x2]
    return masks, rois

In [None]:
# Let’s load an image and try to see how the model performs. You can use any of your images to test the model.

# Load a random image from the images folder
import skimage.io
import numpy as np
image_path = '/kaggle/input/imaterialist-fashion-2019-FGVC6/test/07960daf191c39d8a5c9ea31d0967b72.jpg'

# original image
plt.figure(figsize=(12,10))
skimage.io.imshow(skimage.io.imread(image_path))

img = skimage.io.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

result = model.detect([resize_image(image_path)])
r = result[0]
if r['masks'].size > 0:
    masks = np.zeros((img.shape[0], img.shape[1], r['masks'].shape[-1]), dtype=np.uint8)
    for m in range(r['masks'].shape[-1]):
        masks[:, :, m] = cv2.resize(r['masks'][:, :, m].astype('uint8'), 
                                    (img.shape[1], img.shape[0]), interpolation=cv2.INTER_NEAREST)
    
    y_scale = img.shape[0]/IMAGE_SIZE
    x_scale = img.shape[1]/IMAGE_SIZE
    rois = (r['rois'] * [y_scale, x_scale, y_scale, x_scale]).astype(int)
    
    masks, rois = refine_masks(masks, rois)
else:
    masks, rois = r['masks'], r['rois']

visualize.display_instances(img, rois, masks, r['class_ids'], 
                            ['bg']+label_names, r['scores'])

In [None]:
submission_file = pd.read_csv("/kaggle/input/imaterialist-fashion-2019-FGVC6/sample_submission.csv")
submission_file.head()

In [None]:
# Convert data to run-length encoding
def to_rle(bits):
    rle = []
    pos = 0
    for bit, group in itertools.groupby(bits):
        group_list = list(group)
        if bit:
            rle.extend([pos, sum(group_list)])
        pos += len(group_list)
    return rle

In [None]:
%%time
test_path = Path('/kaggle/input/imaterialist-fashion-2019-FGVC6/test')
sub_list = []
missing_count = 0
for i, row in tqdm(submission_file.iterrows(), total=len(submission_file)):
    image = resize_image(str(test_path/row['ImageId']))
    result = model.detect([image])[0]
    if result['masks'].size > 0:
        masks, _ = refine_masks(result['masks'], result['rois'])
        for m in range(masks.shape[-1]):
            mask = masks[:, :, m].ravel(order='F')
            rle = to_rle(mask)
            label = result['class_ids'][m] - 1
            sub_list.append([row['ImageId'], ' '.join(list(map(str, rle))), label])
    else:
        # The system does not allow missing ids, this is an easy way to fill them 
        sub_list.append([row['ImageId'], '1 1', 23])
        missing_count += 1

In [None]:
# save predicted data in submission file to upload in Kaggle
submission_df = pd.DataFrame(sub_list, columns=submission_file.columns.values)
print("Total image results: ", submission_df['ImageId'].nunique())
print("Missing Images: ", missing_count)

submission_df.to_csv(ROOT_DIR/"submission.csv", index=False)

submission_df

In [None]:
# import time
# time.sleep(5)

# from google.colab import files
# files.download('/content/submission.csv') 