In [26]:
import torch
import clip
from PIL import Image
import numpy as np
import os
from torch.utils.data import DataLoader, Dataset
import faiss


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
index_path = r"C:\Users\user\Documents\code\korean_food_detection\tools\faiss_index_cheat.index"
labels_path = r"C:\Users\user\Documents\code\korean_food_detection\tools\labels_cheat.npy"
train_path = r"C:\Users\user\Documents\code\korean_food_detection\korean_cheat\train"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [27]:
class FoodImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for class_folder in os.listdir(root_dir):
            class_path = os.path.join(root_dir, class_folder)
            if os.path.isdir(class_path):
                for img_file in os.listdir(class_path):
                    img_path = os.path.join(class_path, img_file)
                    self.image_paths.append(img_path)
                    self.labels.append(class_folder)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')

        # Resize image to 512x512 to match YOLO training input scale
        image = image.resize((512, 512))

        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label
    


def create_and_save_faiss_index_with_labels(model, dataloader, transform, device, index_path, labels_path):
    """
    Extracts embeddings and saves both FAISS index and labels.

    Args:
    - model: The pre-trained CLIP model.
    - dataloader: DataLoader object for images.
    - transform: Image preprocessing transform.
    - device: The device (cuda or cpu) for model inference.
    - index_path: Path to save the FAISS index.
    - labels_path: Path to save the labels.

    Returns:
    - FAISS index and corresponding labels.
    """
    model.eval()
    
    embeddings = []
    labels = []
    
    with torch.no_grad():
        for images, label_batch in dataloader:
            images = images.to(device)
            image_features = model.encode_image(images).cpu().numpy()
            
            # Append the embeddings and labels
            embeddings.append(image_features)
            labels.extend(label_batch)  # Extend the list with batch of labels

    embeddings = np.concatenate(embeddings, axis=0)  # Concatenate all embeddings

    # Create FAISS index
    d = embeddings.shape[1]  # Dimensionality of embeddings
    index = faiss.IndexFlatL2(d)
    index.add(embeddings)

    # Save FAISS index to disk
    faiss.write_index(index, index_path)
    print(f"FAISS index saved to {index_path}")

    # Save labels to disk
    np.save(labels_path, np.array(labels))  # Save the labels as a numpy array
    print(f"Labels saved to {labels_path}")

    return index, labels

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the CLIP model (ViT-L/14@336px in this case)
    model, preprocess = clip.load("ViT-L/14@336px", device=device)

    # Dataset and DataLoader (with resize to 512x512)
    dataset = FoodImageDataset(root_dir=train_path, transform=preprocess)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

    # Create and save FAISS index and labels
    faiss_index, labels = create_and_save_faiss_index_with_labels(
        model, dataloader, preprocess, device, index_path, labels_path
    )



FAISS index saved to C:\Users\user\Documents\code\korean_food_detection\tools\faiss_index_cheat.index
Labels saved to C:\Users\user\Documents\code\korean_food_detection\tools\labels_cheat.npy


In [28]:
def load_faiss_index(index_path):
    index = faiss.read_index(index_path)
    print(f"FAISS index loaded from {index_path}")
    return index
def clip_transform(image):
    return preprocess(image)

In [29]:
def classify_image(model, faiss_index, image, transform, device, labels):
    """
    Classify an image using the CLIP model and FAISS index.

    Args:
    - model: The pre-trained CLIP model.
    - faiss_index: The FAISS index with image embeddings.
    - image: The input image to classify (PIL Image).
    - transform: The preprocessing transform used for the image (same as the one for training).
    - device: The device to run the model on (cuda or cpu).
    - labels: The list of labels corresponding to the FAISS index embeddings.

    Returns:
    - The predicted label for the given image or None if no match is found.
    """
    # Check if FAISS index is empty
    if faiss_index.ntotal == 0:
        print("FAISS index is empty, no embeddings available for search.")
        return None

    # Put model in evaluation mode
    model.eval()
    
    # Preprocess the input image
    with torch.no_grad():
        image = transform(image).unsqueeze(0).to(device)

        # Extract image features using the CLIP model
        image_features = model.encode_image(image).cpu().numpy()

        # Search in the FAISS index for the nearest neighbor
        distances, indices = faiss_index.search(image_features, k=1)  # k=1 to get the closest match

        # Check if we got a valid result
        if len(indices) == 0 or len(indices[0]) == 0:
            print("No match found in the FAISS index.")
            return None

        # Get the index of the closest match
        closest_idx = indices[0][0]

        # Check if the index is within the valid range of labels
        if closest_idx >= len(labels):
            print("Closest index is out of bounds for labels.")
            return None

        # Return the corresponding label
        predicted_label = labels[closest_idx]
        return predicted_label

In [30]:
def load_labels(labels_path):
    """
    Load labels from a saved numpy file.
    
    Args:
    - labels_path: Path to the saved labels file.
    
    Returns:
    - List of labels.
    """
    return np.load(labels_path, allow_pickle=True).tolist()

In [31]:
from sklearn.metrics import accuracy_score

import os
from PIL import Image
from sklearn.metrics import accuracy_score

def evaluate_model_on_test_data(model, faiss_index, test_root_dir, transform, device, labels):
    """
    Evaluate the CLIP model using the FAISS index on test images organized in folders by class.
    Detects and prints the misclassified samples.

    Args:
    - model: The pre-trained CLIP model.
    - faiss_index: The FAISS index with image embeddings.
    - test_root_dir: Root directory of the test data, where each subfolder is a class.
    - transform: The preprocessing transform used for the image (same as the one for training).
    - device: The device to run the model on (cuda or cpu).
    - labels: The list of labels corresponding to the FAISS index embeddings.

    Returns:
    - accuracy: The accuracy of the model on the test dataset.
    """
    true_labels = []
    predicted_labels = []

    # Traverse the test directory
    for class_folder in os.listdir(test_root_dir):
        class_path = os.path.join(test_root_dir, class_folder)
        if os.path.isdir(class_path):
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                image = Image.open(img_path).convert('RGB')

                # Get the true label (folder name is the true label)
                true_label = class_folder
                true_labels.append(true_label)

                # Classify the image using the CLIP model and FAISS index
                predicted_label = classify_image(model, faiss_index, image, transform, device, labels)
                predicted_labels.append(predicted_label)

                # Check if the prediction is incorrect
                if predicted_label != true_label:
                    print(f"Misclassified image: {img_path}")
                    print(f"True label: {true_label}, Predicted label: {predicted_label}\n")

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Model accuracy: {accuracy * 100:.2f}%")

    return accuracy


In [32]:
def evaluate_model_on_test_data(model, faiss_index, test_root_dir, transform, device, labels, output_file="misclassified_images_1.csv"):
    """
    Evaluate the CLIP model using the FAISS index on test images organized in folders by class.
    Detects and logs the misclassified samples to a CSV file.

    Args:
    - model: The pre-trained CLIP model.
    - faiss_index: The FAISS index with image embeddings.
    - test_root_dir: Root directory of the test data, where each subfolder is a class.
    - transform: The preprocessing transform used for the image (same as the one for training).
    - device: The device to run the model on (cuda or cpu).
    - labels: The list of labels corresponding to the FAISS index embeddings.
    - output_file: The file path where the misclassified samples will be saved (default is 'misclassified_images.csv').

    Returns:
    - accuracy: The accuracy of the model on the test dataset.
    """
    true_labels = []
    predicted_labels = []

    # Open the file to write misclassifications with utf-8 encoding
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write("Image Path,True Label,Predicted Label\n")  # Header for CSV

        # Traverse the test directory
        for class_folder in os.listdir(test_root_dir):
            class_path = os.path.join(test_root_dir, class_folder)
            if os.path.isdir(class_path):
                for img_file in os.listdir(class_path):
                    img_path = os.path.join(class_path, img_file)
                    image = Image.open(img_path).convert('RGB')

                    # Get the true label (folder name is the true label)
                    true_label = class_folder
                    true_labels.append(true_label)

                    # Classify the image using the CLIP model and FAISS index
                    predicted_label = classify_image(model, faiss_index, image, transform, device, labels)
                    predicted_labels.append(predicted_label)

                    # Check if the prediction is incorrect
                    if predicted_label != true_label:
                        # Save the misclassified image path and labels to the file
                        file.write(f"{img_path},{true_label},{predicted_label}\n")

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Model accuracy: {accuracy * 100:.2f}%")

    return accuracy

In [35]:
# Assuming you have a test directory where each folder is a class
test_root_dir = r'C:\Users\user\Documents\code\korean_food_detection\korean_cheat\test'
# Load the FAISS index and labels
faiss_index = load_faiss_index(index_path)
labels = load_labels(labels_path)
# Evaluate the model
accuracy = evaluate_model_on_test_data(model, faiss_index, test_root_dir, clip_transform, device, labels)

FAISS index loaded from C:\Users\user\Documents\code\korean_food_detection\tools\faiss_index_cheat.index
Model accuracy: 88.71%
