Mounting Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

You can Download the dataset from Here: https://data.world/crowdflower/categorization-dress-patterns

Reading the data

In [0]:
import pandas as pd

data = pd.read_csv("/content/drive/My Drive/internship/dress_patterns.csv")
data.head()

## **Downloading images from the urls**

In [0]:
from tqdm import tqdm
import urllib
import os
if not os.path.exists("/content/drive/My Drive/internship/images"):
  os.mkdir("/content/drive/My Drive/internship/images")

# creating an empty list to store image names
img_name = []
path = "/content/drive/My Drive/internship/images/"
for url in tqdm(data.image_url):
  name = url.split('/')[-1]
  p = os.path.join(path, name)

  # In case execution of the cell is interrupted, re-running the cell will start downloading all the images again
  # creating a loop to avoid re-downloading of the downloded images and resume downloading from the last point. 
  if os.path.exists(p):
    pass
  else:
    urllib.request.urlretrieve(url,p)
  # appending image names to the empty list.
  img_name.append(name)

# creating a new column conatning image names 
data["image_name"] = img_name

In [0]:
data.head()

Unnamed: 0,_unit_id,category,category:confidence,image_url
0,851505458,ikat,0.3487,http://s3-eu-west-1.amazonaws.com/we-attribute...
1,851505459,plain,1.0,http://s3-eu-west-1.amazonaws.com/we-attribute...
2,851505460,polka dot,0.6709,http://s3-eu-west-1.amazonaws.com/we-attribute...
3,851505461,plain,1.0,http://s3-eu-west-1.amazonaws.com/we-attribute...
4,851505462,geometry,0.7035,http://s3-eu-west-1.amazonaws.com/we-attribute...


## **Data Exploration**

In [0]:
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [0]:
#picking a random image and ploting it
rng = np.random.RandomState()
idx = rng.choice(range(data.shape[0]))
image = plt.imread("/content/drive/My Drive/internship/images/"+data.iloc[idx].image_name)
# fetching the category of the image
cat = data.iloc[idx].category
print("category is ", cat)
plt.imshow(image)


In [0]:
print(data['category'].value_counts())

## **Preparing Data**

I am using only 65% of the data for this project and using remaining data as the test data.

In [0]:
# spltting data into train and test
msk = np.random.rand(len(data)) < 0.65
train_data = data[msk]
test_data = data[~msk]
print(train_data.shape, test_data.shape)



In [0]:
#splitting data into train and validation set
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(train_data, train_data.category, test_size = 0.3, random_state = 12 )
print(train_x.shape, val_x.shape, train_y.shape, val_y.shape)

## **Model Building**

In [0]:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping
from sklearn.metrics import f1_score
from keras.optimizers import RMSprop, SGD
from keras.models import Model
import warnings
warnings.filterwarnings("ignore")

In [0]:
# Defining input image size and path
img_width, img_height = 256, 256
image_path = "/content/drive/My Drive/internship/images/"
batch_size = 32
epochs = 20

Using VGG16 as the base model 

In [0]:
base_model = VGG16(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3))

In [0]:
#Adding custom Layers 
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(17, activation="softmax")(x)

In [0]:
# creating the final model 
model_final = Model(input = base_model.input, output = predictions)

In [0]:
# making first few layers non trainable
for layer in base_model.layers[:5]:
    layer.trainable = False

In [0]:
# compile the model 
model_final.compile(loss = "categorical_crossentropy", optimizer = SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])


In [0]:
#Initiate the train and validation generators with data Augumentation 
aug = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=30)

train_generator = aug.flow_from_dataframe(
train_x, directory=image_path, x_col="image_name", y_col="category",
target_size = (img_height, img_width),
batch_size = batch_size, 
class_mode = "categorical")

validation_generator = aug.flow_from_dataframe(
val_x, directory=image_path, x_col="image_name", y_col="category",
target_size = (img_height, img_width),
class_mode = "categorical")


In [0]:
# saving class indices for future use
import csv
w = csv.writer(open("/content/drive/My Drive/internship/class_mapping.csv", "w"))
for key, val in train_generator.class_indices.items():
  w.writerow([key, val])

Creating model checkpoint to resume training from the previous checkpoint

In [0]:
# Save the model according to the conditions
checkpoint_path = "/content/drive/My Drive/internship/vgg16_2.h5"  
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

### **Model Training**

Loading the last model checkpoint if exist else start training from the beginning .

In [0]:

# check if model previously trained model checkpoint exist
if os.path.exists(checkpoint_path):
  # loading the checkpoint
  model_final = load_model(checkpoint_path)
else:
  pass


In [0]:
# Training the model 
model_final.fit_generator(
train_generator,
steps_per_epoch=len(train_x) / batch_size,
epochs = epochs,
validation_data = validation_generator,

callbacks = [checkpoint, early])

# **Model Testing**

If you don't want to train the model, you can directly download the trained model from here: https://drive.google.com/open?id=1J7yvBZTtM-MrSiAIWymwt_CbVbdxBgMj and class mapping file from here: https://drive.google.com/open?id=1-EHTb2MxShqOlcrDvXcByxdb7tbQUvK_ 

In [0]:
# loading model and reding class mapping csv file
model = load_model('/content/drive/My Drive/internship/vgg16_2.h5')
with open('/content/drive/My Drive/internship/class_mapping.csv', mode='r') as infile:
  reader = csv.reader(infile)
  class_indices = {rows[0]:int(rows[1]) for rows in reader}

The below cell takes path to the image you want to test and returns the prediction along with the image of 5 other similar items.

In [0]:
image_file = input("enter image path ")

# Defining a function to make prediction on the input image
def pred(path):
  image = plt.imread(path, cv2.IMREAD_COLOR)  #reading image
  image = cv2.resize(image,(256,256))         #resizing image
  image = np.array(image)/255                 #rescaling image
  np_image = np.expand_dims(image, axis = 0)  # expanding dimension because our model takes 4d input
  pred = model.predict(np_image)        # making prediction
  pred = pred.argmax(axis=-1)

  # getting the label of the predicted category
  label = dict((k,v) for v,k in class_indices.items())
  prednames = [label[k] for k in pred][0]
  print("Dress pattern category is",prednames)
  plt.imshow(image)

  # Fetching data for the similar images for the orignal dataframe
  fetch = data.loc[data['category']== prednames]
  similar_images = np.random.choice(fetch['image_name'], size = 5)  # selecting 5 randoma images
  
  #reading the similar images
  images = []
  for i in similar_images:
    img = plt.imread('/content/drive/My Drive/internship/images/'+i)
    images.append(img)

  #Ploting the similar images
  fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1, 5, figsize=(15,5))
  fig.suptitle('Here are some dress with similar pattern', fontsize=20)
  ax1.imshow(images[0])
  ax2.imshow(images[1])
  ax3.imshow(images[2])
  ax4.imshow(images[3])
  ax5.imshow(images[4])

# calling the function
pred(image_file)