# **List image valid**

In [1]:
import os
from PIL import Image

data_dir = r'data\images'

invalid_images = []

# Lặp qua tất cả các tệp trong thư mục dữ liệu
for root, dirs, files in os.walk(data_dir):
    for file in files:
        file_path = os.path.join(root, file)
        try:
            # Mở và kiểm tra hình ảnh
            img = Image.open(file_path)
            img.verify()  # Kiểm tra tính hợp lệ của hình ảnh
        except (IOError, SyntaxError) as e:
            # Nếu có lỗi, thêm tệp không hợp lệ vào danh sách
            print('Invalid file:', file_path)
            invalid_images.append(file_path)

In [2]:
# Tạo danh sách các tệp hợp lệ từ tất cả các tệp
list_images = []
for root, dirs, files in os.walk(data_dir):
    dirs.sort()
    files.sort()
    for file in files:
        file_path = os.path.join(root, file)
        if file_path not in invalid_images:
            list_images.append(file_path)
# Số lượng ảnh trong tập dữ liệu
len(list_images)

10500

In [3]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import DirectoryIterator
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import cosine_similarity
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pickle

# **Extract features**

In [4]:
# Kích thước ảnh đầu vào 224x224
img_width, img_height = 224,224

# Khởi tạo mô hình ResNet50 và bỏ đi lớp fully connected (top layer)
model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

In [5]:
# Trích xuất đặc trưng từ mô hình ResNet50
def extract_features(image_path):
    img = image.load_img(image_path, target_size=(img_width, img_height))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array, verbose=0)
    return features.flatten()

In [6]:
feature = np.array(extract_features(list_images[0]))
feature.shape

(100352,)

In [7]:
element_size = feature.itemsize

# Số lượng phần tử
num_elements = feature.size

# Tính kích thước bộ nhớ (bytes)
num_elements * element_size

401408

# **Data Storage**

In [19]:
import mysql.connector

conn = mysql.connector.connect(
    host="localhost",
      user="root",
      password="",
      database="food_img_search"
    )

**Get ID food**

In [9]:
def get_food_id(conn, food_name):
    cursor = conn.cursor(buffered=True)
    try:
        query_select = "SELECT id FROM food WHERE name LIKE %s COLLATE utf8_general_ci"
        cursor.execute(query_select, (f'%{food_name}%',))
        result = cursor.fetchone()
        if result:
            return result[0]
        else:
            return None
    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None

**Get description**

In [10]:
def get_description(img_path):
    try:
        file_name = os.path.basename(img_path)
        name, _ = os.path.splitext(file_name)
        name_parts = name.rsplit('_', 1)

        name = name_parts[0]
        no = int(name_parts[1])
        csv_file = rf"data\urls\{name}.csv"
        df = pd.read_csv(csv_file)
        description = df.iloc[no - 1, 3]
        description = description if pd.notna(description) else None
        return description
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None

**Store to db**

In [11]:
def insert_image_data(conn, img_path):
    file_name = os.path.basename(img_path)
    name, _ = os.path.splitext(file_name)
    name = name.rsplit('_', 1)[0]
    food_name = name.replace('_', ' ')

    food_id = get_food_id(conn, food_name)
    cursor = conn.cursor(buffered=True)
    
    try:
        if food_id is None:
            # Thêm mới món ăn vào bảng food
            query_insert = "INSERT INTO food (name) VALUES (%s)"
            cursor.execute(query_insert, (food_name.title(),))
            conn.commit()
            
            # Lấy ID của món ăn vừa thêm
            food_id = cursor.lastrowid

        # Lấy description từ file CSV
        description = get_description(img_path)
        feature_vector = np.array(extract_features(img_path))
        feature_blob = pickle.dumps(feature_vector)
        
        # Chèn dữ liệu vào bảng test
        query = """
        INSERT INTO food_imgs (food_name, label, img_path, description, feature)
        VALUES (%s, %s, %s, %s, %s)
        """
        values = (food_name, food_id, img_path, description, feature_blob)
        cursor.execute(query, values)
        conn.commit()
    except Exception as e:
        conn.rollback()
        print(f"Error: {e}")
    finally:
        cursor.close()

In [12]:
for image_path in list_images:
    insert_image_data(conn,image_path)

# *Update if change image, description,...*

In [33]:
import mysql.connector
def update_record(img_path):
    conn = mysql.connector.connect(
    host="localhost",
      user="root",
      password="",
      database="food_img_search"
    )
    cursor = conn.cursor()

    # Chuyển đổi feature vector thành blob
    description = get_description(img_path)
    new_vector = np.array(extract_features(img_path))
    nfeature_blob = pickle.dumps(new_vector)

    # Câu lệnh UPDATE để cập nhật chỉ các cột feature và description
    query_update = """
    UPDATE food_imgs
    SET feature = %s, description = %s
    WHERE img_path = %s
    """
    values = (nfeature_blob, description, img_path)
    
    # Thực thi câu lệnh UPDATE
    cursor.execute(query_update, values)
    
    conn.commit()
    cursor.close()

update_record(r'data\images\foie_gras_28.jpg')
