# Upload that classifier model to the session storage before running

Clone the github repo

In [1]:
!git clone --branch main https://github.com/cssaivishnu/IR_Repo.git

Cloning into 'IR_Repo'...
remote: Enumerating objects: 63783, done.[K
remote: Counting objects: 100% (96/96), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 63783 (delta 49), reused 71 (delta 37), pack-reused 63687[K
Receiving objects: 100% (63783/63783), 2.34 GiB | 23.38 MiB/s, done.
Resolving deltas: 100% (68/68), done.
Updating files: 100% (76488/76488), done.


Import the essential libraries

In [2]:
import numpy as np
import os
import shutil
import cv2
from google.colab.patches import cv2_imshow
import torchvision.transforms as transforms
from PIL import Image
import json
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

tf.random.set_seed(20)
random.seed = 20
np.random.seed = 20

Make the main repo as the current active repository

In [3]:
os.chdir('/content/IR_Repo')
cwd = os.getcwd()
print(cwd)

/content/IR_Repo


From the complete dataset of approximately 63285 images from 35 categories, we consider only the product categories with atleast 150 images.

In [4]:
def func(name):
    for i in range(len(name)):
        if name[i] == '&' or name[i] == '-':
            name = name[:i] + '_' + name[i+1:]
    return name

Create the label encodings mapping the product categories

In [5]:
dir_path = 'dataset'
i = 0
name_to_label = dict()
label_to_name = dict()
name_list = sorted(os.listdir(dir_path))

for name in name_list:
    if os.path.isdir(os.path.join(dir_path, name)):
        name = func(name)
        name_to_label[name] = i
        label_to_name[i] = name
        i = i + 1

Load 500 out of 600 images from each product category and modify them to (56, 56) shape for training and evaluation of the model

In [7]:
dir_path = 'dataset'
dataset = []
image_shape = (56, 56)

for name in name_list:
    if os.path.isdir(os.path.join(dir_path, name)):
        path = os.path.join(dir_path, name)
        # print(name_to_label[name])
        for f in os.listdir(path):
            if os.path.isfile(os.path.join(path, f)):
                num = int(f.split('.')[0])
                if num > 500:
                    continue
                img_path = os.path.join(path, f)
                img = Image.open(img_path)
                img = img.resize(image_shape, Image.ANTIALIAS)
                pixels = img.load()
                lst = []
                for i in range(img.size[0]):
                    lst1 = []
                    for j in range(img.size[1]):
                        lst1.append(list(pixels[i, j]))
                    lst.append(lst1)
                lst = np.array(lst)
                tupl = (lst,name_to_label[func(name)])
                dataset.append(tupl)

Shuffle the dataset for randomness and split the dataset into features(X) and labels(y)

In [8]:
random.shuffle(dataset)
X = []
y = []

for tupl in dataset:
    x, yy = tupl
    X.append(x)
    y.append(yy)
X = np.array(X)
y = np.array(y).reshape(11000,1)

# Just clear the space
# dataset = []

Initialisation

In [9]:
input_shape = (56, 56, 3)
num_categories = len(name_list)

Split the dataset into train and test data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (8800, 56, 56, 3)
X_test: (2200, 56, 56, 3)
y_train: (8800, 1)
y_test: (2200, 1)


Load the model uploaded to the session and get the prediction probabilities

In [16]:
classifier = keras.models.load_model('classifier.h5')
y_pred = classifier.predict(X_test)



Get all the statistical metrics

In [17]:
y_test = y_test.reshape(2200)
y_p = []
for pred in y_pred:
    y_p.append(np.argmax(pred))
y_pred = np.array(y_p)

cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average=None)
reca = recall_score(y_test, y_pred, average=None)
f1_s = f1_score(y_test, y_pred, average=None)
print(f'Test Accuracy: {round(ac*100,2)}%')

Test Accuracy: 76.32%


Create two Pandas DataFrames to note down the results and save them as a single excel file 


In [18]:
lst = [f'label_{i}' for i in range(22)]
cm_df = pd.DataFrame(cm, columns=lst)
cm_df.reset_index(inplace=True)
cm_df.rename(columns={'index':'Product_Category'}, inplace=True)
cm_df["Product_Category"] = cm_df["Product_Category"].apply(lambda x:label_to_name[x])
cm_df.reset_index(inplace=True)
cm_df.rename(columns={'index':'Labels'}, inplace=True)
cm_df['Labels'] = cm_df['Labels'].apply(lambda x:f'label_{x}')

In [None]:
metrics_df = cm_df.iloc[:,1:2]
metrics_df['Precision'] = prec
metrics_df['Precision'] = metrics_df['Precision'].apply(lambda x: round(x, 3))
metrics_df['Recall'] = reca
metrics_df['Recall'] = metrics_df['Recall'].apply(lambda x: round(x, 3))
metrics_df['Recall'] = metrics_df['Recall'].apply(lambda x: "{:.3f}".format(x)).astype(float)
metrics_df['f1_score'] = f1_s
metrics_df['f1_score'] = metrics_df['f1_score'].apply(lambda x: round(x, 3))
mean = ['Average', metrics_df['Precision'].mean(), metrics_df['Recall'].mean(), metrics_df['f1_score'].mean()]
min = ['Minimum', metrics_df['Precision'].min(), metrics_df['Recall'].min(), metrics_df['f1_score'].min()]
max = ['Maximum', metrics_df['Precision'].max(), metrics_df['Recall'].max(), metrics_df['f1_score'].max()]
metrics_df.loc[len(metrics_df)] = mean
metrics_df.loc[len(metrics_df)] = min
metrics_df.loc[len(metrics_df)] = max

In [20]:
writer = pd.ExcelWriter('Results.xlsx')

# Write each dataframe to a different sheet
metrics_df.to_excel(writer, sheet_name='Metrics', index=False)
cm_df.to_excel(writer, sheet_name='Confusion_Matrix', index=False)

# Save the file
writer.save()

  writer.save()
