In [None]:
import json
import os
import h5py
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# baseline simple

In [None]:

# print("Loading VGG-19 model...")
# model = models.vgg19(pretrained=True).to(device)
# model.eval()

# feature_extractor = torch.nn.Sequential(
#     model.features,  # Phần CNN
#     torch.nn.Flatten(),  # Chuyển tensor về dạng vector
#     *list(model.classifier.children())[:6]  # Giữ lại phần đến fc7
# ).to(device)

In [None]:
# baseline simple
# transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# attention

In [None]:
# Attention
transform = transforms.Compose([
    transforms.Resize((448, 448)),
    transforms.CenterCrop(448),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
vgg = models.vgg19(pretrained=True).features.to(device).eval()

In [None]:
class ImageDataset(Dataset):
    def __init__(self, image_list, transform):
        self.image_list = image_list
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = self.image_list[idx]
        image = Image.open(img_path).convert("RGB")
        return self.transform(image)


In [None]:
input_json = '/content/cocoqa_data_prepro.json'  # Thay bằng đường dẫn phù hợp
image_root = '/content/drive/MyDrive/vqa_data'  # Thư mục gốc chứa ảnh
out_name = 'data_img_att.h5'  # Tên file HDF5 đầu ra
batch_size = 16

In [None]:
print("Loading JSON file:", input_json)
with open(input_json, 'r') as f:
    json_data = json.load(f)

train_list = [os.path.join(image_root, im) for im in json_data['unique_img_train']]
test_list = [os.path.join(image_root, im) for im in json_data['unique_img_test']]


In [None]:
train_dataset = ImageDataset(train_list, transform)
test_dataset = ImageDataset(test_list, transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
# def extract_features(dataloader, model):
#     """ Trích xuất feature từ DataLoader """
#     features = []
#     for batch in tqdm(dataloader, desc="Processing images"):
#         batch = batch.to(device)
#         with torch.no_grad():
#             output = model(batch)
#         features.append(output.cpu().numpy())  # Chuyển về numpy array

#     return np.vstack(features)  # Ghép tất cả các batch lại

In [None]:
def extract_features(dataloader, model, h5file, dataset_name):
    """Trích xuất feature và ghi trực tiếp vào file HDF5"""
    total = len(dataloader.dataset)

    # Lấy shape feature mẫu
    with torch.no_grad():
        example_feature = model(next(iter(dataloader)).to(device)).detach().cpu().numpy()
    feature_shape = example_feature.shape[1:]  # (C, H, W)

    # Tạo dataset trong file HDF5
    dset = h5file.create_dataset(
        dataset_name, shape=(total, *feature_shape), dtype='float32'
    )

    idx = 0
    for batch in tqdm(dataloader, desc=f"Processing {dataset_name}"):
        batch = batch.to(device)
        with torch.no_grad():
            output = model(batch).detach().cpu().numpy()

        bsize = output.shape[0]
        dset[idx:idx + bsize] = output
        idx += bsize


In [None]:
# print("Extracting training image features...")
# feat_train = extract_features(train_loader, vgg)

# print("Extracting testing image features...")
# feat_test = extract_features(test_loader, vgg)

# print(f"Saving extracted features to {out_name}...")
# with h5py.File(out_name, "w") as f:
#     f.create_dataset("images_train", data=feat_train)
#     f.create_dataset("images_test", data=feat_test)

# print("Feature extraction completed successfully!")

In [None]:
with h5py.File(out_name, "w") as f:
    extract_features(train_loader, vgg, f, "images_train")
    extract_features(test_loader, vgg, f, "images_test")