## Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import os
import json

## Open folders

In [None]:
print(os.listdir("../input"))
print(os.listdir("../input/imaterialist-fashion-2019-FGVC6"))

## Open and load label_descriptions file

In [None]:
json_data=open("../input/imaterialist-fashion-2019-FGVC6/label_descriptions.json").read()
label_descriptions = json.loads(json_data)

# dataset info
label_descriptions['info']

## First look at the categories

In [None]:
categories_label_df = pd.DataFrame(label_descriptions['categories'])

# Count categories
print("The number of categories : ",len(categories_label_df))
print("The number of subcategories : ",len(categories_label_df['supercategory'].unique()))

# Print first 5 rows
categories_label_df.head()

### Count: Group by subcategory:

In [None]:
categories_label_df.groupby('supercategory')['name'].count()

### Count attributes

In [None]:
attributes_label_df = pd.DataFrame(label_descriptions['attributes'])
print("The number of attributes : ",len(attributes_label_df))

# Print first 5 rows
display(attributes_label_df.head())

## Train data info: unique images, dataframe shape

In [None]:
train_df = pd.read_csv("../input/imaterialist-fashion-2019-FGVC6/train.csv")
print("dataframe shape:", train_df.shape)
print("number of unique images :",len(set(train_df['ImageId'])))

# Print first 5 rows
train_df.head(5)

## Identify Attributes: split ClassID column into Category and Attributes

In [None]:
train_df['Category'] = train_df['ClassId'].apply(lambda x: int(x.split("_")[0]))
train_df['Attributes'] = train_df['ClassId'].apply(lambda x: "_".join(x.split("_")[1:]))
train_df.head()

## Count percentage:

In [None]:
# objects with attributes
print(f'Class objects with attributes: {train_df[train_df["Attributes"]!=""].shape[0]/train_df.shape[0]*100:.2f}%')

In [None]:
# images with attributes
print(f'Images with attributes: {len(set(train_df[train_df["Attributes"]!=""]["ImageId"]))/len(set(train_df["ImageId"]))*100:.2f}%')

## Simple statistics: histogram (number of images by category)

In [None]:
groupby_category = train_df.groupby('Category')['ImageId'].count()
groupby_category.index = map(int, groupby_category.index)
groupby_category = groupby_category.sort_index()
groupby_category[:5]

fig = plt.figure(figsize=(10, 4))
x = groupby_category.index
y = groupby_category.values

sns.barplot(x,y)
plt.title("Number of images by category", fontsize=20)
plt.xlabel("Category", fontsize=20)
plt.ylabel("# of images", fontsize=20)
plt.show()

### Count images by Category

In [None]:
# The Number of images with Attributes by Category
groupby_category_Having_attributes = train_df[['ImageId','Category']].groupby('Category').count()
groupby_category_Having_attributes.columns = ['# of imgs']

# Print first 5 rows
groupby_category_Having_attributes.head()

## Function: Show image

In [None]:
def show_img(IMG_FILE):
    I = cv2.imread("../input/imaterialist-fashion-2019-FGVC6/train/" + IMG_FILE, cv2.IMREAD_COLOR)
    I = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
    plt.imshow(I) 
    plt.tight_layout()
    plt.show()

## Open data: Image elements

In [None]:
train_df[train_df['ImageId']=='000aac3870ea7c59ca0333ffa5327323.jpg'].sort_values('Category')

### Image example:

In [None]:
show_img('000aac3870ea7c59ca0333ffa5327323.jpg')

## Function: Create mask, which defines items on the image

In [None]:
def make_mask(IMG_FILE):
    df = train_df[train_df.ImageId == IMG_FILE].reset_index(drop = True)
    H = df.iloc[0,2]
    W = df.iloc[0,3]
    
    print("Correct Category :", sorted(set((list(df.Category)))))
    # 1d mask 
    mask = np.full(H*W,dtype='int',fill_value = -1)
    
    for line in df[['EncodedPixels','Category']].iterrows():
        EncodedPixels = line[1][0]
        Category = line[1][1]
        
        pixel_loc = list(map(int,EncodedPixels.split(' ')[0::2]))
        iter_num =  list(map(int,EncodedPixels.split(' ')[1::2]))
        for p,i in zip(pixel_loc,iter_num):
            mask[p:(p+i)] = Category
    
    print("Output :",sorted(set(list(mask))))
    #rle
    mask = mask.reshape(W,H).T
    
    return mask

### Example:

In [None]:
mask = make_mask('000aac3870ea7c59ca0333ffa5327323.jpg')
plt.imshow(mask,cmap='jet')

## Function: open image and mask, combine them

In [None]:
def Masking_Image(IMG_FILE):
        
    I = cv2.imread("../input/imaterialist-fashion-2019-FGVC6/train/" + IMG_FILE, cv2.IMREAD_COLOR)
    I = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
    mask = make_mask(IMG_FILE)

    fig, ax = plt.subplots(nrows = 1, ncols = 3,figsize = (18,6))
    
    ax[0].imshow(I)
    ax[1].imshow(mask, cmap = 'jet')
    ax[2].imshow(I,interpolation = 'none')
    ax[2].imshow(mask,cmap = 'jet', interpolation = 'none', alpha = 0.6)

### Example:

In [None]:
Masking_Image('000aac3870ea7c59ca0333ffa5327323.jpg')