### **Mounting Google Drive....**

In [0]:
# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
!apt-get update -qq 2>&1 > /dev/null
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

In [0]:
# Generate auth tokens for Colab
from google.colab import auth
auth.authenticate_user()

In [0]:
# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
# Work around misordering of STREAM and STDIN in Jupyter.
# https://github.com/jupyter/notebook/issues/3159
prompt = !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass(prompt[0] + '\n\nEnter verification code: ')
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
# Create a directory and mount Google Drive using that directory.
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
print ('Files in Drive:')
!ls drive/

In [0]:
import os
a='drive/ProductTagging'
b='dataset/train'
os.listdir(a+'/'+b)

### Installing the required libraries

In [0]:
!pip install keras
!pip install scikit-learn
!pip install numpy
!pip install glob
!pip install h5py
!pip install os
!pip install json
!pip install Pillow
!pip install pickle

### **Extracting features from images in dataset....**

In [0]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from sklearn.preprocessing import LabelEncoder
import numpy as np
import glob
import h5py
import os
import json

Using TensorFlow backend.


In [0]:
train_path = "drive/ProductTagging/dataset/train"
features_path = "drive/ProductTagging/output/features.h5"
labels_path = "drive/ProductTagging/output/labels.h5"
test_size = 0.30
model_path = "drive/ProductTagging/output/model"
base_model = VGG16(weights="imagenet")                                              #loading pretrained vgg16 model
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)  #retraining the last fully connected layer
image_size = (224, 224)

In [0]:
print ("Loaded the model")

train_labels = os.listdir(train_path)

le = LabelEncoder()                                                                 #encode the labels
le.fit([z for z in train_labels])

In [0]:
# variables to hold features and labels
features = []
labels   = []

# loop over all the labels in the folder
count = 1
for i, label in enumerate(train_labels):
    cur_path = train_path + "/" + label
    count = 1
    for e in [cur_path + "\\*.jpg",cur_path + "\\*.png"]:
      for image_path in glob.glob(e):
          img = image.load_img(image_path, target_size=image_size)
          x = image.img_to_array(img)
          x = np.expand_dims(x, axis=0)
          x = preprocess_input(x)
          feature = model.predict(x)
          flat = feature.flatten()
          features.append(flat)
          labels.append(label)
          print ("Processed image %s in this category"%count)
          count += 1

In [0]:
le = LabelEncoder()                                                                 #encode the labels
le_labels = le.fit_transform(labels)

h5f_data = h5py.File(features_path, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(features))                       #saving the features

h5f_label = h5py.File(labels_path, 'w')
h5f_label.create_dataset('dataset_1', data=np.array(le_labels))                     #saving the labels

h5f_data.close()
h5f_label.close()

In [0]:
model_json = model.to_json()                                                        #saving model
with open(model_path + str(test_size) + ".json", "w") as json_file:
    json_file.write(model_json)
print("Saved model")

model.save_weights(model_path + str(test_size) + ".h5")                             #saving weights
print("Saved weights")

### **Retraining the top layer of pretrained VGG16 model....**

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import numpy as np
import h5py
import os
import pickle

In [0]:
test_size = 0.30
seed = 9
features_path = "drive/ProductTagging/output/features.h5"
labels_path = "drive/ProductTagging/output/labels.h5"
classifier_path = "drive/ProductTagging/output/classifier.pickle"
train_path = "drive/ProductTagging/dataset/train"

In [0]:
h5f_data  = h5py.File(features_path, 'r')               # import features and labels
h5f_label = h5py.File(labels_path, 'r')

features_string = h5f_data['dataset_1']
labels_string   = h5f_label['dataset_1']

features = np.array(features_string)
labels   = np.array(labels_string)

h5f_data.close()
h5f_label.close()

In [0]:
print("Training started")
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(features),np.array(labels),test_size=test_size,random_state=seed) # split the training and testing data

model = LogisticRegression(random_state=seed)
print ("Model created")
model.fit(trainData, trainLabels)


print ()
accuracy = 0

for (label, features) in zip(testLabels, testData):							    #loop over test data
	predictions = model.predict_proba(np.atleast_2d(features))[0]			#predict the probability of each class label
	predictions = np.argsort(predictions)[::-1][:5]

	if label == predictions[0]:
		accuracy += 1

accuracy = (accuracy / float(len(testLabels))) * 100						    #convert accuracies to percentages

print('Final Accuracy -> {:.2f}%\n'.format(accuracy))

preds = model.predict(testData)                                     #evaluate the model on test data

print("Saving model")
pickle.dump(model, open(classifier_path, 'wb'))                     #save the classifier

### **Predicting labels of test images....**

In [0]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import numpy as np
import os
import pickle
from PIL import Image

In [0]:
train_path = "drive/ProductTagging/dataset/train"
test_path = "drive/ProductTagging/dataset/test"
classifier_path = "drive/ProductTagging/output/classifier.pickle"

print("Loaded the classifier")                           #loaded the trained logistic regression classifier
classifier = pickle.load(open(classifier_path, 'rb'))

Loaded the classifier


In [0]:
base_model = VGG16(weights="imagenet")                                              #loaded the pretrained vgg16 model
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)  #retraining the last fully connected layer
image_size = (224, 224)

train_labels = os.listdir(train_path)

test_images = os.listdir(test_path)

In [0]:
f=open("drive/ProductTagging/predicted_labels.txt",'w+')

for image_path in test_images:
    path = test_path + "/" + image_path
    try:
        Image.open(path).verify()                                                   #checking if image is corrupt or not
        img = image.load_img(path, target_size=image_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        feature = model.predict(x)
        flat = feature.flatten()
        flat = np.expand_dims(flat, axis=0)
        preds = classifier.predict(flat)
        print(image_path + " -> " + train_labels[preds[0]])                         #Predicting label of test images
        f.write(image_path + " -> " + train_labels[preds[0]]+'\n')
    except Exception:
        print(image_path + " -> Corrupt Image")
        f.write(image_path + " -> Corrupt Image"+'\n')
f.close()