In [2]:
! pip install gdown

[0mCollecting gdown
  Using cached gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting PySocks!=1.5.7,>=1.5.6 (from requests[socks]->gdown)
  Using cached PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)
Using cached gdown-5.2.0-py3-none-any.whl (18 kB)
Using cached PySocks-1.7.1-py3-none-any.whl (16 kB)
[0mInstalling collected packages: PySocks, gdown
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [gdown]
[0mSuccessfully installed PySocks-1.7.1 gdown-5.2.0


In [3]:
import gdown
import zipfile
import os

# ID của tệp từ URL Google Drive
file_id = "1ziUZxYxyyjN5UCr1JfpLQQv3OEAKmAHX"
output_zip = "file.zip"
extract_folder = "/home/vlai-vqa-nle/minhtq/vqa-nle/data/raw"

# Tạo URL tải xuống trực tiếp
url = f'https://drive.google.com/uc?id={file_id}'

# Tải tệp xuống
print("Đang tải tệp zip...")
gdown.download(url, output_zip, quiet=False)

# Tạo thư mục để giải nén nếu chưa tồn tại
if not os.path.exists(extract_folder):
    os.makedirs(extract_folder)

# Giải nén tệp
print("Đang giải nén tệp...")
with zipfile.ZipFile(output_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Xóa tệp zip sau khi giải nén (tùy chọn)
os.remove(output_zip)

print(f"Tệp đã được tải xuống và giải nén thành công vào thư mục: '{extract_folder}'")


Đang tải tệp zip...


Downloading...
From (original): https://drive.google.com/uc?id=1ziUZxYxyyjN5UCr1JfpLQQv3OEAKmAHX
From (redirected): https://drive.google.com/uc?id=1ziUZxYxyyjN5UCr1JfpLQQv3OEAKmAHX&confirm=t&uuid=d4bfff0c-31a6-42f7-95a2-9df050892b53
To: /home/vlai-vqa-nle/minhtq/vqa-nle/notebooks/file.zip
100%|██████████| 253M/253M [00:22<00:00, 11.1MB/s] 


Đang giải nén tệp...
Tệp đã được tải xuống và giải nén thành công vào thư mục: '/home/vlai-vqa-nle/minhtq/vqa-nle/data/raw'


In [7]:
import json
from collections import defaultdict

# Đường dẫn đến các file annotations
train_captions_path = "/home/vlai-vqa-nle/minhtq/vqa-nle/data/raw/annotations/captions_train2014.json"
val_captions_path = "/home/vlai-vqa-nle/minhtq/vqa-nle/data/raw/annotations/captions_val2014.json"

def load_and_map_captions(json_path):
    """
    Load captions và tạo mapping từ image_id đến danh sách captions
    """
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Tạo dictionary mapping image_id -> list of captions
    image_to_captions = defaultdict(list)
    for ann in data['annotations']:
        image_to_captions[ann['image_id']].append({
            'caption': ann['caption'],
            'caption_id': ann['id']
        })
    
    # Tạo dictionary mapping image_id -> image info
    image_to_info = {}
    for img in data['images']:
        image_to_info[img['id']] = img
    
    return dict(image_to_captions), image_to_info

# Load và map captions cho train
print("Loading train captions...")
train_captions_map, train_images_info = load_and_map_captions(train_captions_path)

# Load và map captions cho val
print("Loading val captions...")
val_captions_map, val_images_info = load_and_map_captions(val_captions_path)

print(f"\nTrain - Số lượng ảnh có captions: {len(train_captions_map)}")
print(f"Val - Số lượng ảnh có captions: {len(val_captions_map)}")

# Ví dụ sử dụng: lấy tất cả captions của một ảnh cụ thể
example_image_id = 318556
if example_image_id in train_captions_map:
    print(f"\nẢnh {example_image_id} có {len(train_captions_map[example_image_id])} captions:")
    for i, cap_info in enumerate(train_captions_map[example_image_id], 1):
        print(f"  {i}. {cap_info['caption']}")
    
    # Lấy thông tin ảnh
    img_info = train_images_info[example_image_id]
    print(f"\nThông tin ảnh:")
    print(f"  File name: {img_info['file_name']}")
    print(f"  Size: {img_info['width']}x{img_info['height']}")
    print(f"  Full path: /mnt/VLAI_data/COCO_Images/train2014/{img_info['file_name']}")

Loading train captions...
Loading val captions...

Train - Số lượng ảnh có captions: 82783
Val - Số lượng ảnh có captions: 40504

Ảnh 318556 có 5 captions:
  1. A very clean and well decorated empty bathroom
  2. A blue and white bathroom with butterfly themed wall tiles.
  3. A bathroom with a border of butterflies and blue paint on the walls above it.
  4. An angled view of a beautifully decorated bathroom.
  5. A clock that blends in with the wall hangs in a bathroom. 

Thông tin ảnh:
  File name: COCO_train2014_000000318556.jpg
  Size: 480x640
  Full path: /mnt/VLAI_data/COCO_Images/train2014/COCO_train2014_000000318556.jpg


In [8]:
import os

# Tạo thư mục output
output_dir = "/home/vlai-vqa-nle/minhtq/vqa-nle/data/processed/coco"
os.makedirs(output_dir, exist_ok=True)

# Lưu train
train_output = os.path.join(output_dir, "coco_train2014_with_captions.json")
with open(train_output, 'w', encoding='utf-8') as f:
    json.dump({
        'captions_map': train_captions_map,
        'images_info': train_images_info
    }, f, ensure_ascii=False, indent=2)
print(f"Saved train to {train_output}")

# Lưu val
val_output = os.path.join(output_dir, "coco_val2014_with_captions.json")
with open(val_output, 'w', encoding='utf-8') as f:
    json.dump({
        'captions_map': val_captions_map,
        'images_info': val_images_info
    }, f, ensure_ascii=False, indent=2)
print(f"Saved val to {val_output}")

Saved train to /home/vlai-vqa-nle/minhtq/vqa-nle/data/processed/coco/coco_train2014_with_captions.json
Saved val to /home/vlai-vqa-nle/minhtq/vqa-nle/data/processed/coco/coco_val2014_with_captions.json
