In [None]:

import csv
import pickle
import random

import cv2
import numpy as np
from PIL import Image
from ultralytics import YOLO
import os
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image as keras_image
from ultralytics import YOLO


model_path = 'last_288.pt'
model = YOLO(model_path)

# Instantiate the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)
coordinate = 0.8

def return_class_name(class_name):
    if class_name == '0':
        class_name = 'star'
    elif class_name == '3':
        class_name = 'water drop'
    elif class_name == '4':
        class_name = 'man head'
    elif class_name == '5':
        class_name = 'man picture'
    elif class_name == '9':
        class_name = 'horse'
    elif class_name == '28':
        class_name = 'triangle'
    elif class_name == '2':
        class_name = 'cloud'
    elif class_name == '19':
        class_name = 'house'
    elif class_name == '27':
        class_name = 'circle'
    elif class_name == '25':
        class_name = 'crown'
    return class_name

# Function to draw bounding boxes on an image
def draw_boxes(image, boxes, names, conf):
    for i, box in enumerate(boxes):
        x1, y1, x2, y2 = map(int, box)
        class_id = int(conf[i])
        class_name = names[class_id]
        class_name = return_class_name(class_name)

        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Draw rectangle
        cv2.putText(image, class_name, (x1 + 5, y1 + 20)  # - 10)
                    , cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)  # Draw text
        break

def find_most_similar_image(new_image_path, load_features_path,result):

    global base_model,coordinate,model

    # 1. Predict the category of the new image and extract its features.
    # For this example, I'll assume you want to use the bounding box area as the feature.
    print('new_image_path = ',new_image_path)

    for ret in model.predict(new_image_path):
        boxes = ret.boxes
        names = ret.names
        break

    cls = boxes.cls.cpu().numpy()  # class IDs
    confidences = boxes.conf.cpu().numpy()  # confidence scores

    # Get bounding box coordinates
    box_coordinates = boxes.xyxy.cpu().numpy()
    encoded_path = cv2.imdecode(np.fromfile(new_image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)

    most_similar_image_path = ''
    similarity_percentage = 0.0

    if len(range(box_coordinates.shape[0])) == 0:
        return most_similar_image_path, similarity_percentage


    for i in range(box_coordinates.shape[0]):
        x1, y1, x2, y2 = box_coordinates[i]
        cropped_img = encoded_path[int(y1):int(y2), int(x1):int(x2)]
        break

    # Preprocess the cropped image for VGG16
    img_data = cv2.resize(cropped_img, (128, 128))
    #img_data = keras_image.img_to_array(cropped_img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)

    # Extract features using VGG16
    vgg16_feature = base_model.predict(img_data)
    feature_vector_predicted = vgg16_feature.flatten()


    class_name = return_class_name(str(int(cls[0])))

    # 2. Load stored features
    similarity_dic = {}
    with open(load_features_path, 'rb') as f:
        loaded_features = pickle.load(f)

    for key, item in loaded_features.items():
        #print(f"Shape of feature_vector_predicted: {feature_vector_predicted.shape}")
        #print(f"Shape of item: {item.shape}")

        similarities = cosine_similarity(feature_vector_predicted.reshape(1, -1),
                                         item.reshape(1, -1))
        similarity_dic[key] = similarities

    # Sorting the dictionary by values (high to low)
    sorted_items = sorted(similarity_dic.items(), key=lambda x: x[1][0][0], reverse=True)
    #print('sorted item = ',sorted_items)


    # 4. Select the most similar image
    #most_similar_idx = np.argmax(similarities)  # Use argmax because a higher cosine similarity is better.
    #most_similar_image_path = stored_images[most_similar_idx]


    for sorted_item_image,sorted_item_value in sorted_items:
        most_similar_image_path = sorted_item_image
        similarity_percentage = sorted_item_value[0][0]
        #print(most_similar_image_path,similarity_percentage)
        break

    #similarity_percentage = similarities[most_similar_idx] * 100  # Convert cosine similarity to percentage

    # 5. Display the similarity on the most similar image
    img = cv2.imread(os.path.join('sample1000', most_similar_image_path))


    #draw box
    encoded_path = cv2.imdecode(np.fromfile(os.path.join('sample1000', most_similar_image_path), dtype=np.uint8), cv2.IMREAD_UNCHANGED)
    for ret in model.predict(encoded_path):
        boxes = ret.boxes
        names = ret.names
        break


    original_image = encoded_path.copy()
    box_coordinates = boxes.xyxy.cpu().numpy()

    for i in range(box_coordinates.shape[0]):
        x1, y1, x2, y2 = box_coordinates[i]
        cropped_img = original_image[int(y1):int(y2), int(x1):int(x2)]
        break

    draw_boxes(encoded_path, boxes.xyxy.cpu().numpy(), names, cls)
    most_similar_image_path = os.path.join('result', most_similar_image_path)
    Image.fromarray(cv2.cvtColor(encoded_path, cv2.COLOR_BGR2RGB)).save(most_similar_image_path)
    #cv2.imshow('Most Similar Image', img)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()

    return most_similar_image_path,similarity_percentage,cropped_img

#most_similar_image_path #가장유사한 이미지경로
#similarity_percentage #유사도 0~1 
#cropped_img #yolo모델로 예측된 box부분의 crop 이미지배열정보

new_image_path = 'test sample/circle.jpg'
load_features_path = 'features.pkl'
result = 'result'
find_most_similar_image(new_image_path,load_features_path,result)


