In [None]:
import os
import cv2
import numpy as np
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

feature_names = ["bgr_1x1", "bgr_2x2", "bgr_3x3",
                 "hsv_1x1", "hsv_2x2", "hsv_3x3",
                 "luv_1x1", "luv_2x2", "luv_3x3",
                 "gabor", "dnn"]

def extract_color_histogram(image, color_space, grid=(1, 1)):
    histograms = []

    h_step, w_step = image.shape[0] // grid[0], image.shape[1] // grid[1]

    for y in range(grid[0]):
        for x in range(grid[1]):
            cell = image[y * h_step:(y + 1) * h_step, x * w_step:(x + 1) * w_step]

            if color_space == "hsv":
                img = cv2.cvtColor(cell, cv2.COLOR_BGR2HSV)
            elif color_space == "luv":
                img = cv2.cvtColor(cell, cv2.COLOR_BGR2Luv)
            else:
                img = cell

            hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
            histograms.append(cv2.normalize(hist, hist).flatten())

    return np.concatenate(histograms)

def extract_gabor_features(image, ksize=31, sigma=1.0, theta=np.pi/4, lambd=15.0, gamma=0.02, psi=0):
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gabor_kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lambd, gamma, psi, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(image_gray, cv2.CV_8UC3, gabor_kernel)
    return filtered_image.flatten()

def extract_dnn_features(image, model):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    input_tensor = transform(pil_image).unsqueeze(0)

    with torch.no_grad():
        features = model(input_tensor)

    return features.squeeze().numpy()

def main():
    prefix = '/export/space0/tanabe-h/data/kadai3a/'
    img_directory = prefix + "img"
    features_directory = prefix + "features"

    if not os.path.exists(features_directory):
        os.makedirs(features_directory)

    img_files = [f for f in os.listdir(img_directory) if f.endswith(".jpg")]
    
    model = models.vgg16(pretrained=True)
    model = model.eval()
    model.classifier = model.classifier[:-1]

    for idx, feature_name in enumerate(feature_names):
        feature_data = {}

        for img_file in img_files:
            file_path = os.path.join(img_directory, img_file)
            image = cv2.imread(file_path)
            file_index = img_file[:-4]

            if feature_name.startswith("bgr") or feature_name.startswith("hsv") or feature_name.startswith("luv"):
                color_space, grid = feature_name.split("_")
                grid = int(grid[0]), int(grid[0])
                feature_data[file_index] = extract_color_histogram(image, color_space, grid=grid)
            elif feature_name == "gabor":
                feature_data[file_index] = extract_gabor_features(image)
            elif feature_name == "dnn":
                feature_data[file_index] = extract_dnn_features(image, model)

        np.savez(os.path.join(features_directory, f"{feature_name}_features.npz"), **feature_data)

if __name__ == "__main__":
    main()


In [None]:
FEATURES_DIR = '/export/space0/tanabe-h/data/kadai3a/features/'
def load_features(feature_name):
    features_path = os.path.join(FEATURES_DIR, f"{feature_name}_features.npz")
    npz_data = np.load(features_path)
    return {key: npz_data[key] for key in npz_data.keys()}

# load_features('bgr_1x1')