In [102]:
from datetime import datetime
import json
import os
import numpy as np
from sklearn.utils import shuffle
import tensorflow as tf
import pandas as pd
import tensorflow_hub as hub
from keras import Sequential
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Activation, Dropout, Embedding, Conv1D, GlobalMaxPooling1D, Bidirectional, CuDNNLSTM, \
    SpatialDropout1D, MaxPooling1D,Conv2D,MaxPooling2D,Flatten, Lambda
import matplotlib.pyplot as plt

from utility.train_data_loader import load_train_data


In [157]:
epochs = 3
batch_size = 256
specialization = "fashion"
gen_test = True

categories_file = open("../data/categories.json", "r")
categories = json.load(categories_file)

all_subcategories = {k.lower(): v for k, v in categories['Mobile'].items()}
all_subcategories.update({k.lower(): v for k, v in categories['Fashion'].items()})
all_subcategories.update({k.lower(): v for k, v in categories['Beauty'].items()})

data_root = "../../"+specialization+"_image/"

datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)

# feature_extractor_url = "https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1"
feature_extractor_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/2"

trainData = load_train_data()
testData = pd.read_csv("../data/test.csv")

validation_data_specialized = trainData[trainData['image_path'].str.contains(specialization)][::100]
validation_data_specialized['image_path'] = validation_data_specialized['image_path']. \
    map(lambda x: x.replace(specialization + '_image/', ''))

test_data_specialized = testData[testData['image_path'].str.contains(specialization)]
test_data_specialized['image_path'] = test_data_specialized['image_path'].\
    map(lambda x: x.replace(specialization+'_image/', ''))

inverted_categories_specialized = {k.lower(): v for k, v in categories[specialization.capitalize()].items()}


custom train data used


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [150]:
train_data_specialized = trainData[trainData['image_path'].str.contains(specialization)][::]
df_train = pd.DataFrame()
df_valid = pd.DataFrame()
num_train=2000
num_valid=int(0.1*num_train)
for k,v in inverted_categories_specialized.items():
    print(v)
    rows = train_data_specialized.loc[train_data_specialized['Category'] == v]
    num_images = rows.shape[0]
    if(num_train+num_valid>num_images):
        nt=int(0.9*num_images)
        nv=int(0.1*num_images)
    else:
        nt=num_train
        nv=num_valid
    # print(nt,nv)
    rows= shuffle(rows)
    rows_train = rows[:nt]
    df_train = df_train.append(rows_train)
    rows_valid = rows[nt:(nt+num_valid)]
    df_valid = df_valid.append(rows_valid)

train_data_specialized = df_train
validation_data_specialized = df_valid

train_data_specialized['image_path'] = train_data_specialized['image_path']. \
    map(lambda x: x.replace(specialization + '_image/', ''))

validation_data_specialized['image_path'] = validation_data_specialized['image_path']. \
    map(lambda x: x.replace(specialization + '_image/', ''))



23
27
18
20
24
22
19
26
25
29
28
17
21
30


In [132]:
IMAGE_SIZE = hub.get_expected_image_size(hub.Module(feature_extractor_url))
image_generator = datagen.flow_from_dataframe(train_data_specialized,
                                              directory=os.path.join(data_root),
                                              x_col="image_path",
                                              y_col="item_category",
                                              target_size=IMAGE_SIZE,
                                              color_mode="rgb",
                                              class_mode="categorical",
                                              shuffle=False,
                                              batch_size=64,
                                              )

Found 26799 images belonging to 14 classes.


In [107]:
label_names = sorted(image_generator.class_indices.items(), key=lambda pair:pair[1])
label_names = np.array([key.title() for key, value in label_names])

def feature_extractor(x):
    feature_extractor_module = hub.Module(feature_extractor_url)
    return feature_extractor_module(x)


for image_batch, label_batch in image_generator:
    print("Image batch shape: ", image_batch.shape)
    print("Label batch shape: ", label_batch.shape)
    break

Image batch shape:  (64, 224, 224, 3)
Label batch shape:  (64, 14)


In [108]:
print(image_batch[0].dtype)
# print(image_batch[0].shape)
# from PIL import Image
# im = Image.fromarray(image_batch[0].astype('uint8'),'RGB')
# im.show()
# print(label_batch)

float32
[[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [

In [109]:
input_shape = IMAGE_SIZE+[3] 

model = Sequential()
model.add(Lambda(feature_extractor, input_shape=input_shape, trainable=True))
#model.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.25))
#model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(inverted_categories_specialized), activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_3 (Lambda)            (None, 1280)              0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               163968    
_________________________________________________________________
dropout_6 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 14)                1806      
Total params: 165,774
Trainable params: 165,774
Non-trainable params: 0
________________________

In [153]:
valid_generator = valid_datagen.flow_from_dataframe(validation_data_specialized,
                                                    directory=os.path.join(data_root),
                                                    x_col="image_path",
                                                    y_col="item_category",
                                                    target_size=IMAGE_SIZE,
                                                    color_mode="rgb",
                                                    class_mode="categorical",
                                                    shuffle=False,
                                                    batch_size=64,
                                                    )


test_generator = test_datagen.flow_from_dataframe(test_data_specialized,
                                                  directory=os.path.join(data_root),
                                                  x_col="image_path",
                                                  y_col=None,
                                                  target_size=IMAGE_SIZE,
                                                  color_mode="rgb",
                                                  class_mode=None,
                                                  shuffle=False,
                                                  batch_size=64,
                                                  )


Found 2713 images belonging to 14 classes.
Found 55440 images.


In [160]:
onehot_to_cat = {v: k for k, v in image_generator.class_indices.items()}


In [161]:
onehot_to_cat

{0: 'a line dress',
 1: 'big size dress',
 2: 'big size top',
 3: 'blouse',
 4: 'bodycon dress',
 5: 'casual dress',
 6: 'crop top',
 7: 'maxi dress',
 8: 'others',
 9: 'party dress',
 10: 'shirt',
 11: 'tanktop',
 12: 'tshirt',
 13: 'wedding dress'}

In [111]:
print(test_data_specialized.shape)

(55440, 3)


In [112]:
def gen_filename_h5():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


def gen_filename_csv():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


# Checkpoint auto
filepath = "../checkpoints/"+gen_filename_h5()+"v2.hdf5"
checkpointer = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')


steps_per_epoch = image_generator.samples//image_generator.batch_size
valid_steps_per_epoch = valid_generator.samples // valid_generator.batch_size
test_steps_per_epoch = test_generator.samples // test_generator.batch_size

history = model.fit_generator(generator=image_generator,
                              steps_per_epoch=steps_per_epoch,
                              validation_data=valid_generator,
                              validation_steps=valid_steps_per_epoch,
                              epochs=epochs,
                              callbacks=[checkpointer],
                              )


Epoch 1/3

Epoch 00001: val_acc improved from -inf to 0.15179, saving model to ../checkpoints/epoch_3_03_20_2019_22_47_15v2.hdf5
Epoch 2/3

Epoch 00002: val_acc improved from 0.15179 to 0.17214, saving model to ../checkpoints/epoch_3_03_20_2019_22_47_15v2.hdf5
Epoch 3/3

Epoch 00003: val_acc did not improve from 0.17214


In [128]:
def perform_test():
    prediction_specialized = model.predict_generator(test_generator, verbose=1, steps=test_steps_per_epoch+1)
    return prediction_specialized

if gen_test:
    prediction_specialized = perform_test()
    predicted_label_specialized = [onehot_to_cat[np.argmax(prediction_specialized[i])] for i in range(len(prediction_specialized))]
    print(prediction_specialized.shape)
    df = pd.DataFrame({'itemid': test_data_specialized['itemid'].astype(int), 'Category': predicted_label_specialized})
    df.to_csv(path_or_buf='res' + gen_filename_csv() + '.csv', index=False)


KeyboardInterrupt: 

In [85]:
print(prediction_specialized[10])

[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]


In [173]:
from PIL import Image
im = Image.fromarray(image_batch[3].astype('uint8'),'RGB')
im.show()

In [171]:
prediction_specialized = model.predict_generator(image_generator, verbose=1, steps=3)
predicted_label_specialized = [onehot_to_cat[np.argmax(prediction_specialized[i])] for i in range(len(prediction_specialized))]
for image_batch,image_label in image_generator:
    break
print(train_data_specialized[:5])
print(predicted_label_specialized[:10])
lab = image_label[:10]
ret = [onehot_to_cat[np.argmax(lab[i])] for i in range(len(lab))]
print(ret)

            itemid                                              title  \
405401    92743979  longdress brukat merah gaun pesta kondangan hi...   
301810  1453890528                                      kebaya brokat   
344330   970036999  gaun bodycon wanita dengan bahan lace dan gaya...   
330666   793293307  dress boat neck lengan panjang bahan lace warn...   
390305  1812417062  dress baju pesta anak perempuan kids flower gi...   

        Category                            image_path  item_category  
405401        23  4de71639faf6ce8d04a87a7fdd563bbc.jpg  wedding dress  
301810        23  3ae4501a0f19905678d99b34ac481a1b.jpg  wedding dress  
344330        23  cfbbbf94a405c4a0d01ccf52bc80ccf1.jpg  wedding dress  
330666        23  17d8525cc2d7747ebcf9ea604b189251.jpg  wedding dress  
390305        23  83f4e2e172102c42e1d4295c19fa54ac.jpg  wedding dress  
['maxi dress', 'maxi dress', 'wedding dress', 'others', 'bodycon dress', 'wedding dress', 'wedding dress', 'wedding dress', 'wedd