### **Mounting Google Drive....**

In [1]:
# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
!apt-get update -qq 2>&1 > /dev/null
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

E: Package 'python-software-properties' has no installation candidate
Selecting previously unselected package google-drive-ocamlfuse.
(Reading database ... 131352 files and directories currently installed.)
Preparing to unpack .../google-drive-ocamlfuse_0.7.1-0ubuntu3~ubuntu18.04.1_amd64.deb ...
Unpacking google-drive-ocamlfuse (0.7.1-0ubuntu3~ubuntu18.04.1) ...
Setting up google-drive-ocamlfuse (0.7.1-0ubuntu3~ubuntu18.04.1) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...


In [0]:
# Generate auth tokens for Colab
from google.colab import auth
auth.authenticate_user()

In [3]:
# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
# Work around misordering of STREAM and STDIN in Jupyter.
# https://github.com/jupyter/notebook/issues/3159
prompt = !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass(prompt[0] + '\n\nEnter verification code: ')
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force

Enter verification code: ··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
Please enter the verification code: Access token retrieved correctly.


In [0]:
# Create a directory and mount Google Drive using that directory.
!mkdir -p drive
!google-drive-ocamlfuse drive

In [5]:
print ('Files in Drive:')
!ls drive/

Files in Drive:
 checkpoint						     'Notes 5th sem'
'Colab Notebooks'					      Novels
'Getting started'					      ProductTagging
 gtav-0.001-alexnet-10-epochs-new.model.data-00000-of-00001   Spreadsheet.ods
 gtav-0.001-alexnet-10-epochs-new.model.index		      test.csv
 gtav-0.001-alexnet-10-epochs-new.model.meta		      TOEFL.rar
 gtav_training_data_new					      train.csv


In [6]:
import os
a='drive/ProductTagging'
b='dataset/train'
os.listdir(a+'/'+b)

['ACCESSORIES_DUFFLE',
 'ACCESSORIES_BELTS',
 'ACCESSORIES_CAPS',
 'ACCESSORIES_GROOMING',
 'ACCESSORIES_EYEWEAR',
 'ACCESSORIES_BRACELETS',
 'ACCESSORIES_KEYCHAINS',
 'ACCESSORIES_BACKPACKS',
 'ACCESSORIES_BAGS',
 'ACCESSORIES_GLOVES',
 'ACCESSORIES_WALLETS',
 'ACCESSORIES_SOCKS',
 'ACCESSORIES_SUNGLASSES',
 'BOTTOMS_DENIM',
 'ACCESSORIES_SCARVES',
 'ACCESSORIES_TIES',
 'ACCESSORIES_WATCHES',
 'BLAZERS_FORMALWEAR',
 'ACCESSORIES_TOTE',
 'ACCESSORIES_SALE',
 'FOOTWEAR_SNEAKERS',
 'BOTTOMS_PANTS',
 'BOTTOMS_SWEATS',
 'BOTTOMS_SHORTS',
 'FOOTWEAR_SALE',
 'BOTTOMS_SWIM',
 'FOOTWEAR_SANDALS',
 'FOOTWEAR_TIES',
 'FOOTWEAR_LOAFERS',
 'FOOTWEAR_BOOTS',
 'JACKETS_FORMALWEAR',
 'PANTS_FORMALWEAR',
 'OUTERWEAR_VESTS',
 'OUTERWEAR_COATS',
 'OUTERWEAR_DENIM',
 'FORMALWEAR_SUITS',
 'OUTERWEAR_SALE',
 'OUTERWEAR_JACKETS',
 'OUTERWEAR_BOMBERS',
 'OUTERWEAR_BLAZERS',
 'TOPS_POLOS',
 'TOPS_TANKS',
 'TOPS_SWEATS',
 'SALE_SWIM',
 'SALE_PANTS',
 'TOPS_TEES',
 'TOPS_HOODIE',
 'TOPS_KNITWEAR',
 'TOPS_SHIRTS

### Installing the required libraries

In [7]:
!pip install keras
!pip install scikit-learn
!pip install numpy
!pip install glob
!pip install h5py
!pip install os
!pip install json
!pip install Pillow
!pip install pickle

Collecting glob
[31m  Could not find a version that satisfies the requirement glob (from versions: )[0m
[31mNo matching distribution found for glob[0m
Collecting os
[31m  Could not find a version that satisfies the requirement os (from versions: )[0m
[31mNo matching distribution found for os[0m
Collecting json
[31m  Could not find a version that satisfies the requirement json (from versions: )[0m
[31mNo matching distribution found for json[0m
Collecting pickle
[31m  Could not find a version that satisfies the requirement pickle (from versions: )[0m
[31mNo matching distribution found for pickle[0m


### **Extracting features from images in dataset....**

In [8]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from sklearn.preprocessing import LabelEncoder
import numpy as np
import glob
import h5py
import os
import json

Using TensorFlow backend.


In [9]:
train_path = "drive/ProductTagging/dataset/train"
features_path = "drive/ProductTagging/output/features.h5"
labels_path = "drive/ProductTagging/output/labels.h5"
test_size = 0.30
model_path = "drive/ProductTagging/output/model"
base_model = VGG16(weights="imagenet")                                              #loading pretrained vgg16 model
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)  #retraining the last fully connected layer
image_size = (224, 224)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [10]:
print ("Loaded the model")

train_labels = os.listdir(train_path)

le = LabelEncoder()                                                                 #encode the labels
le.fit([z for z in train_labels])

Loaded the model


LabelEncoder()

In [11]:
# variables to hold features and labels
features = []
labels   = []

# loop over all the labels in the folder
count = 1
for i, label in enumerate(train_labels):
    cur_path = train_path + "/" + label
    count = 1
    for image_path in glob.glob(cur_path + "/*.jpg"):
        img = image.load_img(image_path, target_size=image_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        feature = model.predict(x)
        flat = feature.flatten()
        features.append(flat)
        labels.append(label)
        print ("Processed image %s in this category"%count)
        count += 1

Processed image 1 in this category
Processed image 2 in this category
Processed image 3 in this category
Processed image 4 in this category
Processed image 1 in this category
Processed image 2 in this category
Processed image 3 in this category
Processed image 4 in this category
Processed image 1 in this category
Processed image 2 in this category
Processed image 3 in this category
Processed image 4 in this category
Processed image 5 in this category
Processed image 6 in this category
Processed image 7 in this category
Processed image 8 in this category
Processed image 9 in this category
Processed image 10 in this category
Processed image 11 in this category
Processed image 12 in this category
Processed image 13 in this category
Processed image 14 in this category
Processed image 15 in this category
Processed image 16 in this category
Processed image 17 in this category
Processed image 18 in this category
Processed image 19 in this category
Processed image 20 in this category
Processed

In [0]:
le = LabelEncoder()                                                                 #encode the labels
le_labels = le.fit_transform(labels)

h5f_data = h5py.File(features_path, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(features))                       #saving the features

h5f_label = h5py.File(labels_path, 'w')
h5f_label.create_dataset('dataset_1', data=np.array(le_labels))                     #saving the labels

h5f_data.close()
h5f_label.close()

In [13]:
model_json = model.to_json()                                                        #saving model
with open(model_path + str(test_size) + ".json", "w") as json_file:
    json_file.write(model_json)
print("Saved model")

model.save_weights(model_path + str(test_size) + ".h5")                             #saving weights
print("Saved weights")

Saved model
Saved weights


### **Retraining the top layer of pretrained VGG16 model....**

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import numpy as np
import h5py
import os
import pickle

In [0]:
test_size = 0.30
seed = 9
features_path = "drive/ProductTagging/output/features.h5"
labels_path = "drive/ProductTagging/output/labels.h5"
classifier_path = "drive/ProductTagging/output/classifier.pickle"
train_path = "drive/ProductTagging/dataset/train"

In [0]:
h5f_data  = h5py.File(features_path, 'r')               # import features and labels
h5f_label = h5py.File(labels_path, 'r')

features_string = h5f_data['dataset_1']
labels_string   = h5f_label['dataset_1']

features = np.array(features_string)
labels   = np.array(labels_string)

h5f_data.close()
h5f_label.close()

In [25]:
print("Training started")
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(features),np.array(labels),test_size=test_size,random_state=seed) # split the training and testing data

model = LogisticRegression(random_state=seed)
print ("Model created")
model.fit(trainData, trainLabels)


print ()
accuracy = 0

for (label, features) in zip(testLabels, testData):							    #loop over test data
	predictions = model.predict_proba(np.atleast_2d(features))[0]			#predict the probability of each class label
	predictions = np.argsort(predictions)[::-1][:5]

	if label == predictions[0]:
		accuracy += 1

accuracy = (accuracy / float(len(testLabels))) * 100						    #convert accuracies to percentages

print('Final Accuracy -> {:.2f}%\n'.format(accuracy))

preds = model.predict(testData)                                     #evaluate the model on test data

print("Saving model")
pickle.dump(model, open(classifier_path, 'wb'))                     #save the classifier

Training started
Model created





Final Accuracy -> 82.46%

Saving model


### **Predicting labels of test images....**

In [1]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import numpy as np
import os
import pickle
from PIL import Image

Using TensorFlow backend.


In [2]:
train_path = "drive/ProductTagging/dataset/train"
test_path = "drive/ProductTagging/dataset/test"
classifier_path = "drive/ProductTagging/output/classifier.pickle"

print("Loaded the classifier")                           #loaded the trained logistic regression classifier
classifier = pickle.load(open(classifier_path, 'rb'))

Loaded the classifier


In [3]:
base_model = VGG16(weights="imagenet")                                              #loaded the pretrained vgg16 model
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)  #retraining the last fully connected layer
image_size = (224, 224)

train_labels = os.listdir(train_path)

test_images = os.listdir(test_path)

In [4]:
f=open("drive/ProductTagging/predicted_labels.txt",'w+')

for image_path in test_images:
    path = test_path + "/" + image_path
    try:
        Image.open(path).verify()                                                   #checking if image is corrupt or not
        img = image.load_img(path, target_size=image_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        feature = model.predict(x)
        flat = feature.flatten()
        flat = np.expand_dims(flat, axis=0)
        preds = classifier.predict(flat)
        print(image_path + " -> " + train_labels[preds[0]])                         #Predicting label of test images
        f.write(image_path + " -> " + train_labels[preds[0]]+'\n')
    except Exception:
        print(image_path + " -> Corrupt Image")
        f.write(image_path + " -> Corrupt Image"+'\n')
f.close()

70.jpg -> Corrupt Image
bag.jpg -> ACCESSORIES_BAGS
glass.jpg -> ACCESSORIES_SUNGLASSES
