In [2]:
import os
import cv2
import pickle
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from numpy.linalg import norm
from tensorflow.keras import Sequential, Model
from sklearn.neighbors import NearestNeighbors
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.applications.resnet_v2 import ResNet50V2,preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19

In [3]:
model = ResNet50V2(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False

model = Sequential([
    model,
    GlobalMaxPooling2D()
])

In [61]:
feature_list = np.array(pickle.load(open('D:/Project Local/feature-extraction/models/resnet50v2/features.pkl', 'rb')))
filenames = pickle.load(open('D:/Project Local/feature-extraction/models/resnet50v2/filenames.pkl', 'rb'))
new_filenames = [data.split('/')[-1:] for data in filenames]

In [9]:
len(filenames)

4260

In [53]:
import pandas as pd

In [72]:
query_img = 'D:/Project Local/feature-extraction/test/test12.jpg'
img = load_img(query_img,target_size=(224,224))
img_array = img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = preprocess_input(expanded_img_array)
result = model.predict(preprocessed_img).flatten()
normalized_result = result / norm(result)

neighbors = NearestNeighbors(n_neighbors=10,algorithm='brute',metric='cosine')
neighbors.fit(feature_list)

distances,indices = neighbors.kneighbors([normalized_result])

# print(distances)

pred = []

for i in range(10):
    index = indices[0][i]
    distance = distances[0][i]
    result = new_filenames[index][0].split('.jpg')[0]
    score = round((1-(distance/1))*100, 2)
    product_name = result
    id_product = get_product_id(data_path='D:/Project Local/feature-extraction/data/all_product.csv', product=product_name)
    pred.append([id_product[0], product_name, score])
df = pd.DataFrame(pred, columns=['id_product', 'product_name', 'score'])



In [73]:
df

Unnamed: 0,id_product,product_name,score
0,6145,Wipro Bor Listrik SDS W6261 26mm,100.0
1,6144,Wipro Bor Listrik SDS W6240 24mm,79.14
2,6143,Wipro Bor Listrik Impact W6137 13mm,78.79
3,6163,Wipro Bor Listrik Impact W6132 13mm,78.11
4,6142,Wipro Bor Listrik Impact W6130 13mm,75.19
5,6152,"Maktec Bor 6,5mm MT 653",74.16
6,1837,Mesin Bor Beton 13 mm Bosch GSB 550,73.05
7,1838,Mesin Bor Baterai Cordless Drill Makita DF 331,72.59
8,6151,Wipro Gerinda 3600 6 inch,72.42
9,6155,Makita Bor Beton 16mm HP 1630,71.38


In [74]:
import json

In [77]:
temp_json = 'D:/Project Local/feature-extraction/temp/result.json'
df.to_json(temp_json, orient='records')
f = open(temp_json)
data_json = json.load(f)

In [78]:
data_json

[{'id_product': 6145,
  'product_name': 'Wipro Bor Listrik SDS W6261 26mm',
  'score': 100.0},
 {'id_product': 6144,
  'product_name': 'Wipro Bor Listrik SDS W6240 24mm',
  'score': 79.14},
 {'id_product': 6143,
  'product_name': 'Wipro Bor Listrik Impact W6137 13mm',
  'score': 78.79},
 {'id_product': 6163,
  'product_name': 'Wipro Bor Listrik Impact W6132 13mm',
  'score': 78.11},
 {'id_product': 6142,
  'product_name': 'Wipro Bor Listrik Impact W6130 13mm',
  'score': 75.19},
 {'id_product': 6152,
  'product_name': 'Maktec Bor 6,5mm MT 653',
  'score': 74.16},
 {'id_product': 1837,
  'product_name': 'Mesin Bor Beton 13 mm Bosch GSB 550',
  'score': 73.05},
 {'id_product': 1838,
  'product_name': 'Mesin Bor Baterai Cordless Drill Makita DF 331',
  'score': 72.59},
 {'id_product': 6151,
  'product_name': 'Wipro Gerinda 3600 6 inch',
  'score': 72.42},
 {'id_product': 6155,
  'product_name': 'Makita Bor Beton 16mm HP 1630',
  'score': 71.38}]

In [69]:
def get_product_id(data_path, product):
    df_data = pd.read_csv(data_path)
    nn = []
    for nama in df_data.values:
        new_nama = str(nama[1]).replace('/', '_')
        nn.append(new_nama)
    df_data['NewName'] = nn
    df_data.drop(columns=['PdNama'], inplace=True)
    result = [data_num[0] for data_num in df_data.values if product == data_num[1]]
    return result

In [24]:
for a in distances:
    print(distances[0])

[1.1920929e-07 2.0862925e-01 2.1213269e-01 ... 7.9899073e-01 8.0128855e-01
 8.0128855e-01]


In [3]:
!python main3.py


1978 0.0
1980 0.20862919
1981 0.21213251
1976 0.2189169
1974 0.24806082
['Wipro Bor Listrik SDS W6261 26mm', 'Wipro Bor Listrik SDS W6240 24mm', 'Wipro Bor Listrik Impact W6137 13mm', 'Wipro Bor Listrik Impact W6132 13mm', 'Wipro Bor Listrik Impact W6130 13mm']


2023-08-30 20:29:17.064896: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import os
import cv2
import pickle
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from numpy.linalg import norm
from tensorflow.keras import Sequential, Model
from sklearn.neighbors import NearestNeighbors
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.applications.resnet_v2 import ResNet50V2,preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19

In [None]:
model = ResNet50V2(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False

model = Sequential([
    model,
    GlobalMaxPooling2D()
])

In [None]:
base_path = os.getcwd()
pickle_path = os.path.join(base_path, 'pickle-group')

In [None]:
feature_list = np.array(pickle.load(open('models/resnet/resv2features.pkl', 'rb')))
filenames = pickle.load(open('models/resnet/resv2filenames.pkl', 'rb'))

In [None]:
filenames

In [None]:
new_filenames = [f"{data.split('/')[-2:][0]}/{data.split('/')[-2:][1]}" for data in filenames]
new_filenames

In [None]:
import matplotlib.pyplot as plt

In [None]:
query_img = 'test/test12.jpg'
img = load_img(query_img,target_size=(224,224))
img_array = img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = preprocess_input(expanded_img_array)
result = model.predict(preprocessed_img).flatten()
normalized_result = result / norm(result)

neighbors = NearestNeighbors(n_neighbors=5,algorithm='brute',metric='cosine')
neighbors.fit(feature_list)

distances,indices = neighbors.kneighbors([normalized_result])

img = cv2.imread(query_img)
# plt.imshow(img)
# plt.title('query image')
# plt.axis("off")
# plt.show()

pred = []


# print(indices)
for i in range(5):
    index = indices[0][i]
    distance = distances[0][i]
#     group = filenames[index].split('/')[-2:][0]#.split('.jpg')[0]
    name = filenames[index].split('.jpg')[0]
#     img_hehe = cv2.imread(f"tkquick-group/{group}/{name}.jpg")
#     plt.imshow(img_hehe)
#     plt.title(name)
#     plt.axis("off")
#     plt.show()
    print(index, distance)
    pred.append(name)
print(pred)

In [None]:
!python main2.py

In [None]:
for i in range(5):
    index = 3206
#     distance = distances[0][i]
#     group = new_filenames[index].split('/')[-2:][0]#.split('.jpg')[0]
    name = new_filenames[index].split('/')[-2:][1].split('.jpg')[0]
    print(name)
#     print(index)

In [None]:
# resnet
3368
3206
2733
4128
3227

# vgg
1978
1981
1980
1976
1970

In [None]:
# data_pkl = pickle.load(open('models/1 embeddings.pkl', 'rb'))

In [None]:
# filename = []
# features = []
# for file in data_pkl.values:
#     filename.append(file[0])
#     features.append(file[1][0])
# features_arr = np.array(features)

In [None]:
# def load_model(feature_list_path, filenames_path):
#     model = ResNet50V2(weights='imagenet',include_top=False,input_shape=(224,224,3))
#     model.trainable = False

#     model = Sequential([
#         model,
#         GlobalMaxPooling2D()
#     ])
    
#     feature_list = np.array(pickle.load(open(feature_list_path, 'rb')))
#     filenames = pickle.load(open(filenames_path, 'rb'))
    
#     new_filenames = [data.split('/')[-2:][1] for data in filenames]

#     return model, feature_list, new_filenames

In [None]:
# feature_list = np.array(pickle.load(open('models/1 embeddings.pkl', 'rb')))
# filenames = pickle.load(open('models/1 filenames.pkl', 'rb'))

In [None]:
# feature_list

In [None]:
# If model Resnet V1 Old
new_filenames = [data.split('/')[-2:][1] for data in filenames]

In [None]:
# If model using newer and dataset havnt grouping
new_filenames = ['tokoquick' + '/' + file for file in filename]

In [None]:
new_filenames

In [None]:
def get_prediction(query_image, model, feature_list, filenames):
    img = load_img(query_img,target_size=(224,224))
    img_array = img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)

    neighbors = NearestNeighbors(n_neighbors=5,algorithm='brute',metric='cosine')
    neighbors.fit(feature_list)

    distances,indices = neighbors.kneighbors([normalized_result])

    pred = []
    for i in range(5):
        index = indices[0][i]
        distance = distances[0][i]
        pred.append(filenames[index])
        
    return pred