### This notebook contains code to prepare dataset for training to recognise apparel attributes.


In [1]:
#Importing tensorflow and numpy
import tensorflow as tf
import numpy as np
import sklearn 
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import os
import re

In [2]:
#initializing the imagenet directory and training images directory
model_dir = "imagenet"
attribute_training_images = "ClothingAttributeDataset/images/"

In [3]:
#Getting the list of training images names
training_images = [attribute_training_images+f for f in os.listdir(attribute_training_images) if re.search('jpg|JPG', f)]

In [4]:
#This function is from classify_image.py which creates graph from saved GraphDef file and returns a saver
def create_graph():
  """Creates a graph from saved GraphDef file and returns a saver."""
  # Creates graph from saved graph_def.pb.
  with tf.gfile.FastGFile(os.path.join(
      model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    _ = tf.import_graph_def(graph_def, name='')

In [5]:
#Function to generate bottleneck values from classifyimage.py
def get_bottleneck_values(images):  # modifying name of function
  """Runs inference on an image.

  Args:
    image: Image file name.

  Returns:
    Nothing
  """
  # Creates graph from saved GraphDef.
  create_graph()
  feature_vector_size = 2048 # pool_3:0 contains a float description vector of size 2048  
  with tf.Session() as sess:
    # Some useful tensors:
    # 'softmax:0': A tensor containing the normalized prediction across
    #   1000 labels.
    # 'pool_3:0': A tensor containing the next-to-last layer containing 2048
    #   float description of the image.
    # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
    #   encoding of the image.
    # Runs the softmax tensor by feeding the image_data as input to the graph.
    bottleneck_tensor = sess.graph.get_tensor_by_name('pool_3:0') #changing from softmax:0 to pool_3:0
    feature_vectors = np.empty([len(images),2048])
    image_names = []
    for i,image in enumerate(images): # Iterating through images 
        image_data = tf.gfile.FastGFile(image, 'rb').read()
        feature_vector = sess.run(bottleneck_tensor,
                           {'DecodeJpeg/contents:0': image_data})
        feature_vector = np.squeeze(feature_vector)
        image_names.append(image)
        feature_vectors[i,:] = feature_vector 
        if(i % 10 == 0): #Print out just to see the function is processing 
            print("Processing image %d  %s"%(i,image))
    return feature_vectors,image_names

In [6]:
attribute_features,image = get_bottleneck_values(training_images)

Processing image 0  ClothingAttributeDataset/images/000848.jpg
Processing image 10  ClothingAttributeDataset/images/001335.jpg
Processing image 20  ClothingAttributeDataset/images/000962.jpg
Processing image 30  ClothingAttributeDataset/images/000127.jpg
Processing image 40  ClothingAttributeDataset/images/000828.jpg
Processing image 50  ClothingAttributeDataset/images/000939.jpg
Processing image 60  ClothingAttributeDataset/images/000549.jpg
Processing image 70  ClothingAttributeDataset/images/001625.jpg
Processing image 80  ClothingAttributeDataset/images/001157.jpg
Processing image 90  ClothingAttributeDataset/images/001500.jpg
Processing image 100  ClothingAttributeDataset/images/001686.jpg
Processing image 110  ClothingAttributeDataset/images/001195.jpg
Processing image 120  ClothingAttributeDataset/images/001638.jpg
Processing image 130  ClothingAttributeDataset/images/000545.jpg
Processing image 140  ClothingAttributeDataset/images/001411.jpg
Processing image 150  ClothingAttrib

In [12]:
image_names = image
feature_vectors_sorted = np.empty([len(training_images),2048])
image_name_sorted = []

In [13]:
#Since features are generated in random order data has to be sorted first. So that each row corresponds 
#to correct output in the attributes list.
def sort():
    k = 0
    for i in range(1,1857):
        num_string = str(i)
        l = len(num_string)
        num_string = ("0" * (6-l)) + num_string
        for index, name in enumerate(image_names):
            if re.search(num_string, name):
                feature_vectors_sorted[k,:] = attribute_features[index,:]
                image_name_sorted.append(image_names[index])
                k = k+1

In [14]:
sort()

In [20]:
import pickle
pickle.dump(feature_vectors_sorted,open('feature_vectors_sorted1','wb'))
pickle.dump(image_name_sorted,open('image_names_sorted1','wb'))
