In [1]:
import cv2
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from io_minio import get_single_object_img
from db_common import select_data

In [2]:
def process_image_pdi_concat(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist_gray = cv2.calcHist([gray], [0], None, [256], [0, 256])
    hist_gray = cv2.normalize(hist_gray, hist_gray).flatten()

    hist_b = cv2.calcHist([image], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
    hist_r = cv2.calcHist([image], [2], None, [256], [0, 256])

    hist_rgb = np.concatenate([cv2.normalize(hist_b, hist_b).flatten(), cv2.normalize(hist_g, hist_g).flatten(),
        cv2.normalize(hist_r, hist_r).flatten()])
    return np.concatenate([hist_gray, hist_rgb])

In [5]:
df = select_data('SELECT * FROM DATA') 
df["img"] = df.apply(lambda row: get_single_object_img(row['path_data']), axis=1)
df

Unnamed: 0,id_data,path_data,tp_data,dt_inclusion,img
0,1,6991d71c229338e96edda28cd,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
1,2,d690a57297e2febe812fcf1ab,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
2,3,325b8ba8fd664dfe6f3fc043f,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
3,4,7222b60f9716b3f7e550e44d0,IMG,2025-10-28,"[[[254, 253, 255], [254, 253, 255], [254, 254,..."
4,5,08b3ef1df43b5e5c4586ed4d4,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
...,...,...,...,...,...
7005,7006,d448e783bc5e7ba14989346b0,IMG,2025-10-28,"[[[255, 255, 254], [255, 255, 254], [255, 255,..."
7006,7007,62158fdd835a3c7a91342cbde,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
7007,7008,5206ed10be9954db3f2d6c04e,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
7008,7009,1780569b3e22efad5f5804be1,IMG,2025-10-28,"[[[253, 254, 255], [255, 254, 255], [255, 254,..."


In [9]:
features = df['img'].apply(process_image_pdi_concat)
feature_matrix = np.vstack(features.values)

num_cols = feature_matrix.shape[1]
feature_cols = [f"feat_{i}" for i in range(num_cols)]

df_features = pd.DataFrame(feature_matrix, columns=feature_cols)
df = pd.concat([df, df_features], axis=1)

knn = NearestNeighbors(n_neighbors=len(df), metric="euclidean")
knn.fit(feature_matrix)
df

Unnamed: 0,id_data,path_data,tp_data,dt_inclusion,img,feat_0,feat_1,feat_2,feat_3,feat_4,...,feat_1014,feat_1015,feat_1016,feat_1017,feat_1018,feat_1019,feat_1020,feat_1021,feat_1022,feat_1023
0,1,6991d71c229338e96edda28cd,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.001191,0.001985,0.003970,0.009131,0.024216,0.026598,0.048829,0.078207,0.174278,0.954755
1,2,d690a57297e2febe812fcf1ab,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.001717,0.002289,0.003433,0.007725,0.012303,0.021459,0.037768,0.050072,0.137912,0.976539
2,3,325b8ba8fd664dfe6f3fc043f,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000300,0.002402,0.005105,0.017118,0.024625,0.040542,0.062464,0.160365,0.972400
3,4,7222b60f9716b3f7e550e44d0,IMG,2025-10-28,"[[[254, 253, 255], [254, 253, 255], [254, 254,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.001213,0.003235,0.006066,0.012132,0.025477,0.049741,0.059851,0.089372,0.213117,0.944265
4,5,08b3ef1df43b5e5c4586ed4d4,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.001024,0.002389,0.003753,0.007166,0.018767,0.033439,0.051183,0.062784,0.204731,0.958822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7005,7006,d448e783bc5e7ba14989346b0,IMG,2025-10-28,"[[[255, 255, 254], [255, 255, 254], [255, 255,...",0.0,0.000000,0.000000,0.000370,0.000370,...,0.000325,0.001950,0.000975,0.001950,0.005200,0.010400,0.036725,0.031200,0.132600,0.974024
7006,7007,62158fdd835a3c7a91342cbde,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.000000,0.000000,0.000000,0.000000,...,0.001520,0.000380,0.001520,0.004940,0.006840,0.013680,0.044079,0.027739,0.147436,0.959853
7007,7008,5206ed10be9954db3f2d6c04e,IMG,2025-10-28,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0.0,0.001577,0.000946,0.003786,0.007887,...,0.000845,0.000564,0.000564,0.001127,0.003945,0.008734,0.028739,0.025358,0.145667,0.979097
7008,7009,1780569b3e22efad5f5804be1,IMG,2025-10-28,"[[[253, 254, 255], [255, 254, 255], [255, 254,...",0.0,0.000000,0.000000,0.000612,0.001225,...,0.002128,0.003725,0.003725,0.004257,0.005853,0.019155,0.053208,0.037778,0.176651,0.915710


In [12]:
query_img = cv2.imread('/dataset/Banana/r0_3_100.jpg')

query_vec = process_image_pdi_concat(query_img).reshape(1, -1)

distances, indices = knn.kneighbors(query_vec)

results = []
for rank, idx in enumerate(indices[0]):
    image_name = df.iloc[idx]['path_data']
    distance = round(float(distances[0][rank]), 5)
    similarity = round(float(1 / (1 + distance)), 2)
    
    results.append({
        "rank": rank + 1,
        "image_path": image_name,
        "distance": distance,
        "similarity": similarity
    })

df_results = (
    pd.DataFrame(results)
    .sort_values(by="distance", ascending=True)
    .reset_index(drop=True)
)

indices

array([[4653, 4687, 4683, ..., 4100, 4016, 4044]], shape=(1, 7010))

In [13]:
indices[0]

array([4653, 4687, 4683, ..., 4100, 4016, 4044], shape=(7010,))