In [73]:
from datetime import datetime
import json
import os
import numpy as np

import tensorflow as tf
import pandas as pd
import tensorflow_hub as hub
from keras import Sequential
from keras.callbacks import ModelCheckpoint
from keras.layers import Lambda, Dense
import matplotlib.pyplot as plt

from utility.train_data_loader import load_train_data

In [98]:
epochs = 10
batch_size = 256
specialization = "mobile"
gen_test = False

categories_file = open("../data/categories.json", "r")
categories = json.load(categories_file)

all_subcategories = {k.lower(): v for k, v in categories['Mobile'].items()}
all_subcategories.update({k.lower(): v for k, v in categories['Fashion'].items()})
all_subcategories.update({k.lower(): v for k, v in categories['Beauty'].items()})

data_root = "../../"+specialization+"_image/"
data_root = pathlib.Path(data_root)

datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

feature_extractor_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/2"

trainData = load_train_data()
testData = pd.read_csv("../data/test.csv")

train_data_specialized = trainData[trainData['image_path'].str.contains(specialization)][::10]
train_data_specialized['image_path'] = train_data_specialized['image_path']. \
    map(lambda x: x.replace(specialization + '_image/', ''))

validation_data_specialized = trainData[trainData['image_path'].str.contains(specialization)][::100]
validation_data_specialized['image_path'] = validation_data_specialized['image_path']. \
    map(lambda x: x.replace(specialization + '_image/', ''))

test_data_specialized = testData[testData['image_path'].str.contains(specialization)]
test_data_specialized['image_path'] = test_data_specialized['image_path'].\
    map(lambda x: x.replace(specialization+'_image/', ''))

inverted_categories_specialized = {k.lower(): v for k, v in categories[specialization.capitalize()].items()}

custom train data used


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [99]:
IMAGE_SIZE = hub.get_expected_image_size(hub.Module(feature_extractor_url))
image_generator = datagen.flow_from_dataframe(train_data_specialized,
                                              directory=os.path.join(data_root),
                                              x_col="image_path",
                                              y_col="item_category",
                                              target_size=IMAGE_SIZE,
                                              color_mode="rgb",
                                              class_mode="categorical",
                                              shuffle=True,
                                              batch_size=64,
                                              )

Found 16033 images belonging to 27 classes.


In [100]:
label_names = sorted(image_generator.class_indices.items(), key=lambda pair:pair[1])
label_names = np.array([key.title() for key, value in label_names])


def feature_extractor(x):
    feature_extractor_module = hub.Module(feature_extractor_url)
    return feature_extractor_module(x)


for image_batch, label_batch in image_generator:
    print("Image batch shape: ", image_batch.shape)
    print("Label batch shape: ", label_batch.shape)
    break

Image batch shape:  (64, 224, 224, 3)
Label batch shape:  (64, 27)


In [101]:
model = Sequential()
model.add(Lambda(feature_extractor, input_shape=IMAGE_SIZE+[3], trainable=True))
model.add(Dense(len(inverted_categories_specialized), activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

KeyboardInterrupt: 

In [None]:
def gen_filename_h5():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


def gen_filename_csv():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


# Checkpoint auto
filepath = "../checkpoints/"+gen_filename_h5()+"v2.hdf5"
checkpointer = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')


steps_per_epoch = image_generator.samples//image_generator.batch_size
valid_steps_per_epoch = valid_generator.samples // valid_generator.batch_size
test_steps_per_epoch = test_generator.samples // test_generator.batch_size


In [102]:
valid_generator = valid_datagen.flow_from_dataframe(validation_data_specialized,
                                                    directory=os.path.join(data_root),
                                                    x_col="image_path",
                                                    y_col="item_category",
                                                    target_size=IMAGE_SIZE,
                                                    color_mode="rgb",
                                                    class_mode="categorical",
                                                    shuffle=True,
                                                    batch_size=64,
                                                    )


test_generator = test_datagen.flow_from_dataframe(test_data_specialized,
                                                  directory=os.path.join(data_root),
                                                  x_col="image_path",
                                                  y_col=None,
                                                  target_size=IMAGE_SIZE,
                                                  color_mode="rgb",
                                                  class_mode=None,
                                                  shuffle=False,
                                                  batch_size=64,
                                                  )


Found 1604 images belonging to 25 classes.
Found 40417 images.


In [103]:
label_names = sorted(image_generator.class_indices.items(), key=lambda pair:pair[1])
label_names = np.array([key.title() for key, value in label_names])


def feature_extractor(x):
    feature_extractor_module = hub.Module(feature_extractor_url)
    return feature_extractor_module(x)


for image_batch, label_batch in image_generator:
    print("Image batch shape: ", image_batch.shape)
    print("Label batch shape: ", label_batch.shape)
    break


model = Sequential()
model.add(Lambda(feature_extractor, input_shape=IMAGE_SIZE+[3], trainable=True))
model.add(Dense(len(inverted_categories_specialized), activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


Image batch shape:  (64, 224, 224, 3)
Label batch shape:  (64, 27)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_4 (Lambda)            (None, 1280)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 27)                34587     
Total params: 34,587
Trainable params: 34,587
Non-trainable params: 0
_________________________________________________________________


In [104]:
def gen_filename_h5():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


def gen_filename_csv():
    return 'epoch_'+str(epochs) + '_' + datetime.now().strftime("%m_%d_%Y_%H_%M_%S")


# Checkpoint auto
filepath = "../checkpoints/"+gen_filename_h5()+"v2.hdf5"
checkpointer = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')


steps_per_epoch = image_generator.samples//image_generator.batch_size
valid_steps_per_epoch = valid_generator.samples // valid_generator.batch_size
test_steps_per_epoch = test_generator.samples // test_generator.batch_size

history = model.fit_generator(generator=image_generator,
                              steps_per_epoch=steps_per_epoch,
                              validation_data=valid_generator,
                              validation_steps=valid_steps_per_epoch,
                              epochs=epochs,
                              callbacks=[checkpointer],
                              )


Epoch 1/10

ValueError: Error when checking target: expected dense_3 to have shape (27,) but got array with shape (25,)