# Import Libraries

In [1]:
from PIL import Image

import numpy as np
import os
import json
import imagesize
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import traceback
import sys
%matplotlib inline
sns.set_style()

# to divide our data into train and validation set
from sklearn.model_selection import train_test_split
#to encode our labels
from tensorflow.keras.utils import to_categorical
#to build our model 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPool2D,Flatten,Dropout
# Our optimizer options
from keras.optimizers import RMSprop
from keras.optimizers import Adam
#Callback options
from tensorflow.keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
#importing image data generator for data augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#for the final prediction report
from sklearn.metrics import classification_report ,confusion_matrix
from keras.applications.resnet50 import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from tensorflow.keras.models import save_model, load_model

# Load Data

In [2]:
base_dir = '../input/til2020/'

with open(base_dir + r"train.json","r") as file:
    train_data = json.load(file)
    
with open(base_dir + r"val.json","r") as file:
    val_data = json.load(file)
    
train_annotations = train_data['annotations']
train_images = train_data['images']
categories = train_data['categories']

val_annotations = val_data['annotations']
val_images = val_data['images']

category_mapping = {}

for category_item in categories:
    category_mapping[category_item['id']] = category_item['name']

train_id_to_path_mapping = {}

for image_item in train_images:
    train_id_to_path_mapping[image_item['id']] = image_item['file_name']
    
val_id_to_path_mapping = {}

for image_item in val_images:
    val_id_to_path_mapping[image_item['id']] = image_item['file_name']
    
for annotation in train_annotations:
    annotation['image_path'] = '../input/til2020/train/train/'+train_id_to_path_mapping[annotation['image_id']]
    annotation['cat'] = category_mapping[annotation['category_id']]
    annotation['bbox'] = list(map(int,annotation['bbox']))
    
for annotation in val_annotations:
    annotation['image_path'] = '../input/til2020/val/val/'+val_id_to_path_mapping[annotation['image_id']]
    annotation['cat'] = category_mapping[annotation['category_id']]
    annotation['bbox'] = list(map(int,annotation['bbox']))
    
annotations = train_annotations + val_annotations

# Downsample Dresses Data

In [3]:
categories

[{'id': 1, 'name': 'tops'},
 {'id': 2, 'name': 'trousers'},
 {'id': 3, 'name': 'outerwear'},
 {'id': 4, 'name': 'dresses'},
 {'id': 5, 'name': 'skirts'}]

In [4]:
train_annotations[1]

{'area': 55769,
 'iscrowd': 0,
 'id': 2,
 'image_id': 1000,
 'category_id': 1,
 'bbox': [321, 332, 217, 257],
 'image_path': '../input/til2020/train/train/1000.jpg',
 'cat': 'tops'}

In [5]:
indexes = []
train_annotations_2 = []

counts = {'tops':0,'trousers':0,'outerwear':0,'dresses':0,'skirts':0,}

for idx, annot in enumerate(train_annotations):
    if annot['cat'] == 'dresses':
        indexes.append(idx)
    else:
        train_annotations_2.append(annot)
        
    counts[annot['cat']]+=1

print("Total Train Annotations:", len(train_annotations))
print("Dresses Count:", len(indexes))

print("Category Wise Count:")
print(counts)

max_samples = 8500

for i in indexes[:max_samples]:
    train_annotations_2.append(train_annotations[i])

print("New Train Annotations:", len(train_annotations_2))

Total Train Annotations: 13317
Dresses Count: 7585
Category Wise Count:
{'tops': 945, 'trousers': 1671, 'outerwear': 1486, 'dresses': 7585, 'skirts': 1630}
New Train Annotations: 13317


# Confirm Number of Noise Images

In [6]:
import os
print(len(os.listdir('../input/noise-image-generation')))

3704


# Prepare Images To Train a Model

In [7]:
# Adding Noise Category Id and Name
categories.append({'id':6, 'name':'noise'})
print(categories)

# Converting Categories to One-Hot Encoded Vectors 
new_categories = [x['name'] for x in categories]
print(new_categories)
encoded_categories = to_categorical(list(range(len(new_categories))), num_classes=len(new_categories))
print(encoded_categories)

category_mapping = {x:encoded_categories[i] for i,x in enumerate(new_categories)}
print(category_mapping)

# Add Noise Images to Annotations
for i, path in enumerate(os.listdir('../input/noise-image-generation')):
    record = {'area': None,
             'iscrowd': 0,
             'id': -1,
             'image_id': -1,
             'category_id': 6,
             'bbox': None,
             'image_path': '../input/image-classification/'+path,
             'cat': 'noise'}
    
    if i < 3200:
        train_annotations_2.append(record)
    else:
        val_annotations.append(record)
    
print(len(train_annotations_2))

[{'id': 1, 'name': 'tops'}, {'id': 2, 'name': 'trousers'}, {'id': 3, 'name': 'outerwear'}, {'id': 4, 'name': 'dresses'}, {'id': 5, 'name': 'skirts'}, {'id': 6, 'name': 'noise'}]
['tops', 'trousers', 'outerwear', 'dresses', 'skirts', 'noise']
[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]
{'tops': array([1., 0., 0., 0., 0., 0.], dtype=float32), 'trousers': array([0., 1., 0., 0., 0., 0.], dtype=float32), 'outerwear': array([0., 0., 1., 0., 0., 0.], dtype=float32), 'dresses': array([0., 0., 0., 1., 0., 0.], dtype=float32), 'skirts': array([0., 0., 0., 0., 1., 0.], dtype=float32), 'noise': array([0., 0., 0., 0., 0., 1.], dtype=float32)}
16517


In [8]:
def get_cropped_image(img, bbox):
    start_x, start_y, width, height = bbox
    cropped_img = img[start_y:start_y+height, start_x:start_x+width]
    return cropped_img
      
def get_reshaped_image(img, new_shape=(224,224)):
    resized_image = cv2.resize(img, new_shape, interpolation = cv2.INTER_NEAREST) 
    return resized_image

def rescale_bbox(bbox, current_img_shape, new_img_shape=(224,224)):
    x_ratio = new_img_shape[0] / current_img_shape[0]
    y_ratio = new_img_shape[1] / current_img_shape[1]
    
    new_x = bbox[0] * x_ratio
    new_y = bbox[1] * y_ratio
    new_width = bbox[2] * x_ratio
    new_height = bbox[3] * y_ratio
    
    return new_x, new_y, new_width, new_height

In [9]:
ignore_flip = ('dresses', 'noise')

def transform_data(annotations, samples_per_cat=None, cats=None):
    features = []
    labels = []
    max_check = False
    cat_count = {}
    
    if samples_per_cat is not None:
        max_check = True
        cat_count = {x:0 for x in cats}
    else:
        samples_per_cat = sys.maxsize
        
    
    for i, annotation in enumerate(annotations):
        img_path = annotation['image_path']
        cat = annotation['cat']
        bbox = annotation['bbox']

        try:
            if max_check:
                if cat in cats:
                    if cat_count[cat] >= samples_per_cat:
                        continue
                else:
                    continue

            img = cv2.imread(img_path)

            if img is None:
                continue
            
            if cat == 'noise':
                resized_image = get_reshaped_image(img, new_shape=(128,128))
            else:
                #x,y,w,h = rescale_bbox(bbox, (img.shape[0],img.shape[1]))
                cropped_image = get_cropped_image(img, bbox)
                resized_image = get_reshaped_image(cropped_image, new_shape=(128,128))

            features.append(resized_image)
            labels.append(category_mapping[cat])

            cat_count[cat] += 1
            
            if cat not in ignore_flip:
                features.append(cv2.flip(resized_image,1))
                labels.append(category_mapping[cat])
                cat_count[cat] += 1
            
            if i != 0 and i % 1000 == 0:
                print("Processed Images: ",i)

            #print(resized_image.shape)

            #plt.imshow(resized_image)
            #plt.title(cat)
            #plt.show()
        except:
            print(f"Error in image: bbox={bbox}, img_path={img_path}, cat={cat}")
            traceback.print_exc()
        
    return features, labels
    
    
max_samples = 10000
# cats = {'tops','trousers'}
cats = set(new_categories)
    
train_features, train_labels = transform_data(train_annotations_2, samples_per_cat=max_samples, cats=cats)
    
print(len(train_features))
print(len(train_labels))

#print(train_data[0])
#print(labels[0])

test_features, test_labels = transform_data(val_annotations, samples_per_cat=max_samples, cats=cats)

print(len(train_features))
print(len(train_labels))

train_features_2 = np.asarray(train_features)
print(train_features_2.shape)
train_labels_2 = np.asarray(train_labels)
print(train_labels_2.shape)

print(len(test_features))
print(len(test_labels))

test_features_2 = np.asarray(test_features)
print(test_features_2.shape)
test_labels_2 = np.asarray(test_labels)
print(test_labels_2.shape)

Processed Images:  1000
Processed Images:  2000
Processed Images:  3000
Processed Images:  4000
Processed Images:  5000
Processed Images:  6000
Processed Images:  7000
Processed Images:  8000
Processed Images:  9000
Processed Images:  10000
Processed Images:  11000
Processed Images:  12000
Processed Images:  13000
19049
19049
Processed Images:  1000
Processed Images:  2000
19049
19049
(19049, 128, 128, 3)
(19049, 6)
3578
3578
(3578, 128, 128, 3)
(3578, 6)


# Train a Model

In [10]:
batch_size = 64
input_shape = (128, 128, 3)
epoch = 100

In [11]:
# batch_size = 512
# input_shape = (128, 128, 3)
# epoch = 100

# resnet_50 = ResNet50(weights=None, input_shape=input_shape, classes=len(categories))

# summarize the model
# resnet_50.summary()

# resnet_50.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# early_stop= EarlyStopping(monitor='val_loss',patience=10)

# learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
#                                             patience=10, 
#                                             verbose=1, 
#                                             factor=0.5, 
#                                             min_lr=0.00001)

# mcp_save = ModelCheckpoint('.mdl_resnet50_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')

# resnet_50.fit(train_features_2, train_labels_2,
#           epochs=epoch,
#           batch_size=batch_size,
#           validation_data=(test_features_2,test_labels_2), 
#           callbacks=[early_stop, mcp_save, learning_rate_reduction])

# metrics=pd.DataFrame(resnet_50.history.history)
# metrics

## Inception V3

In [12]:
# inception_v3 = InceptionV3(weights=None, input_shape=input_shape, classes=len(categories))

# summarize the model
# inception_v3.summary()

# inception_v3.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# early_stop= EarlyStopping(monitor='val_loss',patience=10)

# learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
#                                             patience=10, 
#                                             verbose=1, 
#                                             factor=0.5, 
#                                             min_lr=0.00001)

# mcp_save = ModelCheckpoint('.mdl_inceptionv3_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')

# inception_v3.fit(train_features_2, train_labels_2,
#           epochs=epoch,
#           batch_size=batch_size,
#           validation_data=(test_features_2,test_labels_2), 
#           callbacks=[early_stop, mcp_save, learning_rate_reduction])

# metrics=pd.DataFrame(inception_v3.history.history)
# metrics

## Train VGG16

In [13]:
vgg16 = VGG16(weights=None, input_shape=input_shape, classes=len(categories))

# summarize the model
# vgg16.summary()

vgg16.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

early_stop= EarlyStopping(monitor='val_loss',patience=10)

mcp_save = ModelCheckpoint('mdl_vgg16_wts_2.hdf5', save_best_only=True, monitor='val_loss', mode='min')

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=10, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

vgg16.fit(train_features_2, train_labels_2,
          epochs=epoch,
          batch_size=batch_size,
          validation_data=(test_features_2,test_labels_2), 
          callbacks=[early_stop, learning_rate_reduction])

# Save the model
filepath = './vgg16_2'
save_model(vgg16, filepath)

metrics=pd.DataFrame(vgg16.history.history)
metrics

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


Unnamed: 0,loss,accuracy,val_loss,val_accuracy,lr
0,20.788681,0.385165,1.564201,0.373952,0.001
1,1.520821,0.396924,1.567742,0.373952,0.001
2,1.497449,0.398394,1.55822,0.373952,0.001
3,1.495225,0.398184,1.566278,0.373952,0.001
4,1.494835,0.398184,1.549967,0.373952,0.001
5,1.494762,0.398184,1.561114,0.373952,0.001
6,1.494678,0.398184,1.553253,0.373952,0.001
7,1.49448,0.398184,1.564214,0.373952,0.001
8,1.494635,0.398184,1.55461,0.373952,0.001
9,1.494267,0.398184,1.560419,0.373952,0.001
