In [None]:
import csv
import os
import pickle

import cv2
import h5py
import numpy as np
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image as keras_image
import pandas as pd

from ultralytics import YOLO

model_path = 'last_288.pt'
model = YOLO(model_path)

# Instantiate the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)

# Function to draw bounding boxes on an image
def draw_boxes(image, boxes, names, conf):
    for i, box in enumerate(boxes):
        x1, y1, x2, y2 = map(int, box)
        class_id = int(conf[i])
        class_name = names[class_id]
        class_name = return_class_name(class_name)

        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Draw rectangle
        cv2.putText(image, class_name, (x1 + 5, y1 + 20)  # - 10)
                    , cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)  # Draw text
        break

def return_class_name(class_name):
    if class_name == '0':
        class_name = 'star'
    elif class_name == '3':
        class_name = 'water drop'
    elif class_name == '4':
        class_name = 'man head'
    elif class_name == '5':
        class_name = 'man picture'
    elif class_name == '9':
        class_name = 'horse'
    elif class_name == '28':
        class_name = 'triangle'
    elif class_name == '2':
        class_name = 'cloud'
    elif class_name == '19':
        class_name = 'house'
    elif class_name == '27':
        class_name = 'circle'
    elif class_name == '25':
        class_name = 'crown'
    return class_name

def extract_and_save_features(samples_dir, output_path, threshold=0.2):

    global base_model

    # Check if samples_dir is a valid directory
    if not os.path.isdir(samples_dir):
        raise ValueError(f"'{samples_dir}' is not a valid directory.")

    # # Open the CSV file for writing
    # with open(output_csv_path, 'w', newline='') as csvfile:
    #     fieldnames = ['image_name', 'class_name', 'feature_vector', 'confidence']
    #     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
#     writer.writeheader()

    # Create a list to collect data rows
    data_rows = []

    # Lists to collect data
    image_names = []
    class_names = []
    feature_vectors = []
    confidences_list = []
    test_count = 0
    image_name_list = []
    all_features = {}

        # Iterate over each image in the directory
    for image_name in os.listdir(samples_dir):
        test_count += 1
        image_path = os.path.join(samples_dir, image_name)


        # Use the yolov8_predict_for_test function to get predictions
        #saved_path = yolov8_predict_for_test(image_path, threshold)

        # Since yolov8_predict_for_test draws on the image and saves it, you'll
        # need to extract features before that. For this example, I'll assume
        # you're extracting area and confidence.
        # Let's modify the function to return boxes, names, and confidences
        # and then process them here.

        encoded_path = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
        # Run inference on 'bus.jpg' with arguments
        result = model.predict(encoded_path,
                               conf=threshold)  # , save=True)#, imgsz=320, conf=0.2,classes=[0,1,2,3])
        # Draw the bounding boxes on the image

        for ret in result:
            boxes = ret.boxes
            names = ret.names
            break

        cls = boxes.cls.cpu().numpy()  # class IDs
        confidences = boxes.conf.cpu().numpy()  # confidence scores

        box_coordinates = boxes.xyxy.cpu().numpy()
        for i in range(box_coordinates.shape[0]):
            x1, y1, x2, y2 = box_coordinates[i]
            cropped_img = encoded_path[int(y1):int(y2), int(x1):int(x2)]
            break

        # Preprocess the cropped image for VGG16
        img_data = cv2.resize(cropped_img, (128, 128))
        #img_data = keras_image.img_to_array(cropped_img)
        img_data = np.expand_dims(img_data, axis=0)
        img_data = preprocess_input(img_data)

        # Extract features using VGG16
        vgg16_feature = base_model.predict(img_data)
        feature_vector = vgg16_feature.flatten()
        all_features[image_name] = feature_vector

        # is_in = False
        # for i, class_id in enumerate(cls):
        #     is_in = True
        #     image_name_list.append(image_name)
        #     #if confidences[i] >= threshold:
        #         #h5 method
        #         # image_names.append(image_name)
        #         # class_names.append(return_class_name(names[int(class_id)]))
        #         # feature_vectors.append(feature_vector)
        #         # confidences_list.append(confidences[i])
        #
        #     #panda method
        #     # Create a dictionary for this row
        #     data_row = {
        #         'image_name': image_name,
        #         'class_name': return_class_name(names[int(class_id)]),
        #         #'feature_vector': ",".join(map(str, feature_vector)),
        #         'feature_vector': vgg16_feature,
        #         'confidence': confidences[i]
        #     }
        #     # Append the row data to the list
        #     data_rows.append(data_row)
        #     break
        #
        # if is_in == False:
        #     print('is in False = ',encoded_path)
        #     break


        # if test_count >= 10:
        #     # print('test count = ',test_count)
        #     # for _ in image_name_list:
        #     #     print(_)
        #     break

        # print('features vector = ',feature_vector)
        # for i, class_id in enumerate(cls):
        #     if confidences[i] >= threshold:
        #         writer.writerow({
        #             'image_name': image_name,
        #             'class_name': return_class_name(names[int(class_id)]),
        #             'feature_vector': feature_vector,  # You might want to further process or reduce the dimension of this before saving
        #             'confidence': confidences[i]
        #         })
        #
        #     break

    print('sample features save count = ',test_count)
    with open(output_path, 'wb') as f:
        pickle.dump(all_features, f)
    # Convert the list of dictionaries to a DataFrame
    #df = pd.DataFrame(data_rows)

    # Save the DataFrame to a CSV
    #df.to_csv(output_path, index=False, encoding='utf-8')

    # # Convert the list of feature vectors to a numpy array
    # feature_vectors_array = np.array(feature_vectors)
    #
    # # Ensure all feature vectors have the same shape
    # if not np.all([fv.shape == feature_vectors_array[0].shape for fv in feature_vectors_array]):
    #     raise ValueError("Not all feature vectors have the same shape!")
    #
    # # Save the collected data using h5py
    # with h5py.File(output_h5_path, 'w') as hf:
    #     hf.create_dataset('image_names', data=image_names, dtype=h5py.string_dtype(encoding='utf-8'))
    #     hf.create_dataset('class_names', data=class_names, dtype=h5py.string_dtype(encoding='utf-8'))
    #     hf.create_dataset('feature_vectors', data=feature_vectors_array)
    #     hf.create_dataset('confidences', data=confidences_list)

# Call the function
samples_directory = 'sample1000'
result_path = 'features.pkl'
extract_and_save_features(samples_directory, result_path)

