In [None]:
import numpy as np
import pandas as pd
import os
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.neighbors import NearestNeighbors
import pickle
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Tải mô hình VGG16 đã huấn luyện, bỏ đi lớp fully connected để lấy đặc trưng
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

In [None]:
def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features.flatten()

In [None]:
def build_feature_database(df, image_folder):
    features_list = []
    for index, row in df.iterrows():
        filenames = row['image_filenames']
        if isinstance(filenames, str):
            filenames = eval(filenames)
        if filenames:
            for filename in filenames:
                img_path = os.path.join(image_folder, filename)
                if os.path.exists(img_path):
                    features = extract_features(img_path, model)
                    features_list.append(features)
                else:
                    print(f"File {img_path} not found, skipping.")
        else:
            print(f"No filenames found for index {index}, skipping.")
    features_array = np.array(features_list)
    return features_array

In [None]:
image_folder = '/content/drive/MyDrive/DS_KLTN/data/images/'
updated_df = pd.read_csv('/content/drive/MyDrive/DS_KLTN/data/fashion_vector_with_filenames.csv')

In [None]:
features_array = build_feature_database(updated_df, image_folder)

#### **save model**

In [None]:
with open('/content/drive/MyDrive/DS_KLTN/model/image_search.pkl', 'wb') as f:
    pickle.dump(features_array, f)

#### **save vector và Binary File**

In [None]:
vector_save_path = '/content/drive/MyDrive/DS_KLTN/data/image_vectors.npy'

In [None]:
np.save(vector_save_path, features_array)
print(f"Vectors saved successfully to {vector_save_path}")

### **repair vector image**

In [None]:
product_data_path = '/content/drive/MyDrive/DS_KLTN/data/fashion_vector_with_filenames.csv'
image_vectors_path = '/content/drive/MyDrive/DS_KLTN/model/image_vectors.npy'

In [None]:
products_df = pd.read_csv(product_data_path).rename(columns={'id':'product_id'})
image_vectors = np.load(image_vectors_path, allow_pickle=True)

In [None]:
# Tạo danh sách để lưu product_id và vector tương ứng
vectors = []
ids = []

vector_index = 0
for _, row in products_df.iterrows():
    product_id = row['product_id']
    image_filenames = eval(row['image_filenames'])  # Chuyển chuỗi thành danh sách

    # Với mỗi hình ảnh, kết hợp product_id với vector ảnh
    for filename in image_filenames:
        ids.append(product_id)
        vectors.append(image_vectors[vector_index])
        vector_index += 1

In [None]:
# Lưu dictionary chứa id và vectors vào file .npy
data_to_save = {"id": ids, "vector": vectors}
np.save('/content/drive/MyDrive/DS_KLTN/model/product_image_vectors.npy', data_to_save, allow_pickle=True)

In [None]:
# Load dữ liệu từ file .npy
loaded_data = np.load('/content/drive/MyDrive/DS_KLTN/model/product_image_vectors.npy', allow_pickle=True).item()

In [None]:
# Truy cập các id và vectors
ids = loaded_data["id"]
vectors = loaded_data["vector"]

In [None]:
# Ví dụ: In ra các id và vector
for i, vector in zip(ids, vectors):
    print(f"Product ID: {i}, Vector: {vector}")