In [1]:
from keras.applications.resnet50 import ResNet50
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling="avg")

Using TensorFlow backend.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import cv2
import numpy as np

def get_image(fname, show=False):
    img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB)
    if img is None:
         return None
    if show:
         plt.imshow(img)
         plt.axis('off')
    img = cv2.resize(img, (224, 224))
    #img = np.swapaxes(img, 0, 2)
    #img = np.swapaxes(img, 1, 2)
    img = img[np.newaxis, :]
    
    #img = img.astype('float32') / 255.
    
    return img

In [3]:
import glob
import os

def process_files(folder_name, start_index=0):
    
    products = []
    index = start_index
    dim = 2048
    xb = np.empty(shape=[0, dim], dtype=np.float32)
    
    for imgfolderpath in glob.glob(folder_name + '/*'):
        productTitle = os.path.basename(imgfolderpath)
    
        for file in glob.glob(imgfolderpath + '/*'):
        #with open(file_name) as file:
        #for line in file:
            try:
                img = get_image(file)
                product = {}
                #fields = line.split(',')
                product['id'] = str(index)
                product['productTitle'] = productTitle #fields[0].strip()
                product['imageUrl'] = file #fields[1].strip()
                # download image to be featurized and preprocess it
                #file = mx.test_utils.download(product['imageUrl'])
                product['imageFileName'] = file
                # extract features

                features_batch = model.predict([img])
                features = features_batch[0]

                # the Knn algorithm we'll use requires float32 rather than the default float64
                xb = np.append(xb, [features.astype(np.float32)], axis=0)

                products.append(product)
                index += 1
            except:
                print("Something went wrong with the file: " + file)            
    return (products, xb)

In [4]:
products, train_features = process_files('/dataset/10classes')




In [5]:
import sklearn
import sklearn.preprocessing

In [6]:
Y = []
for product in products:
    Y.append(product['productTitle'])

In [7]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics                


x_train, x_test, y_train, y_test = train_test_split(train_features, Y, test_size=0.2, random_state=42)

In [25]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

In [26]:
clf.fit(x_train,y_train)


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [27]:
y_pred=clf.predict(x_test)

In [28]:
acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy=", acc)

Accuracy= 0.889273356401384


In [None]:
for n in np.arange(100,10000,50):
    clf=RandomForestClassifier(n_estimators=n)
    clf.fit(x_train,y_train)
    y_pred=clf.predict(x_test)
    acc = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy=", acc, " with n=", n)

Accuracy= 0.8961937716262975  with n= 100
Accuracy= 0.903114186851211  with n= 150
Accuracy= 0.9065743944636678  with n= 200
Accuracy= 0.9100346020761245  with n= 250
Accuracy= 0.9204152249134948  with n= 300
Accuracy= 0.9100346020761245  with n= 350
Accuracy= 0.9134948096885813  with n= 400
Accuracy= 0.9134948096885813  with n= 450
Accuracy= 0.9100346020761245  with n= 500
Accuracy= 0.9204152249134948  with n= 550
Accuracy= 0.916955017301038  with n= 600
Accuracy= 0.9100346020761245  with n= 650
Accuracy= 0.9134948096885813  with n= 700
Accuracy= 0.9134948096885813  with n= 750
Accuracy= 0.9134948096885813  with n= 800
Accuracy= 0.9238754325259516  with n= 850
Accuracy= 0.9134948096885813  with n= 900
Accuracy= 0.903114186851211  with n= 950
Accuracy= 0.916955017301038  with n= 1000
Accuracy= 0.9100346020761245  with n= 1050
Accuracy= 0.9100346020761245  with n= 1100
Accuracy= 0.9065743944636678  with n= 1150
Accuracy= 0.916955017301038  with n= 1200
Accuracy= 0.9204152249134948  with

Accuracy= 0.9134948096885813  with n= 9750
Accuracy= 0.916955017301038  with n= 9800
Accuracy= 0.916955017301038  with n= 9850
Accuracy= 0.9134948096885813  with n= 9900
Accuracy= 0.9134948096885813  with n= 9950
