In [None]:
import os
import csv
import random
from PIL import Image

# === Config ===
input_dir = "raw_data"  # Thư mục ảnh gốc
output_path = "input_data"
output_image_dir = os.path.join(output_path, "X_test")  # Ảnh puzzle bị xáo
output_original_dir = os.path.join(output_path, "Y_test_images")  # Ảnh gốc resize
csv_output_path = os.path.join(output_path, "Y_test.csv")

ROWS = 3
COLS = 5
MAX_IMAGES = 100
RESIZE_WIDTH = 600
RESIZE_HEIGHT = 360

os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_original_dir, exist_ok=True)

def slice_image(image_path, rows, cols):
    """Cắt ảnh resize thành rows x cols, trả về list mảnh theo thứ tự chuẩn."""
    img = Image.open(image_path).convert("RGB")
    img = img.resize((RESIZE_WIDTH, RESIZE_HEIGHT))

    width, height = img.size
    piece_width = width // cols
    piece_height = height // rows

    pieces = []
    for r in range(rows):
        for c in range(cols):
            left = c * piece_width
            upper = r * piece_height
            right = left + piece_width
            lower = upper + piece_height
            pieces.append(img.crop((left, upper, right, lower)))
    return pieces, img

def create_image_from_pieces(pieces, rows, cols, output_filename):
    """Ghép các mảnh theo list đã cho và lưu ra ảnh."""
    piece_width, piece_height = pieces[0].size
    new_img = Image.new("RGB", (piece_width * cols, piece_height * rows))
    for idx, piece in enumerate(pieces):
        r = idx // cols
        c = idx % cols
        new_img.paste(piece, (c * piece_width, r * piece_height))
    new_img.save(output_filename, "JPEG", quality=95)

def process_single_image(image_path, writer):
    base_name = os.path.splitext(os.path.basename(image_path))[0] + ".jpg"
    original_save_path = os.path.join(output_original_dir, base_name)

    # 1. Cắt và resize ảnh
    pieces, resized_img = slice_image(image_path, ROWS, COLS)
    resized_img.save(original_save_path, "JPEG", quality=95)

    # 2. Tạo hoán vị ngẫu nhiên
    original_indices = list(range(len(pieces)))
    shuffled_indices = original_indices.copy()
    while True:
        random.shuffle(shuffled_indices)
        if shuffled_indices != original_indices:
            break

    # 3. Sắp xếp mảnh theo shuffled_indices -> tạo ảnh puzzle bị xáo
    shuffled_pieces = [pieces[i] for i in shuffled_indices]
    shuffled_image_name = os.path.splitext(base_name)[0] + "_shuffled.jpg"
    output_image_path = os.path.join(output_image_dir, shuffled_image_name)
    create_image_from_pieces(shuffled_pieces, ROWS, COLS, output_image_path)

    # 4. Ghi vào CSV: chính là hoán vị vừa shuffle
    writer.writerow([shuffled_image_name] + shuffled_indices)

def generate_dataset():
    image_files = [
        f for f in os.listdir(input_dir)
        if f.lower().endswith(('.png', '.jpg', '.jpeg'))
    ]

    if not image_files:
        print("No images found in raw_images directory!")
        return

    if len(image_files) > MAX_IMAGES:
        image_files = random.sample(image_files, MAX_IMAGES)

    print(f"Selected {len(image_files)} images randomly. Processing...")

    with open(csv_output_path, mode="w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        header = ["image_filename"] + [f"piece_at_{r}_{c}" for r in range(ROWS) for c in range(COLS)]
        writer.writerow(header)

        for idx, filename in enumerate(image_files, start=1):
            image_path = os.path.join(input_dir, filename)
            print(f"[{idx}/{len(image_files)}] Processing {filename}...")
            process_single_image(image_path, writer)

    print("\nDataset generated successfully!")
    print(f"- Original images: {output_original_dir}")
    print(f"- Shuffled images: {output_image_dir}")
    print(f"- CSV mapping: {csv_output_path}")

if __name__ == "__main__":
    generate_dataset()


Selected 100 images randomly. Processing...
[1/100] Processing Salvador_Dali_67.jpg...
[2/100] Processing Edgar_Degas_345.jpg...
[3/100] Processing Vasiliy_Kandinskiy_15.jpg...
[4/100] Processing Piet_Mondrian_52.jpg...
[5/100] Processing Raphael_7.jpg...
[6/100] Processing Caravaggio_54.jpg...
[7/100] Processing Giotto_di_Bondone_68.jpg...
[8/100] Processing Amedeo_Modigliani_37.jpg...
[9/100] Processing Alfred_Sisley_187.jpg...
[10/100] Processing Francisco_Goya_232.jpg...
[11/100] Processing Sandro_Botticelli_130.jpg...
[12/100] Processing Rene_Magritte_59.jpg...
[13/100] Processing Paul_Gauguin_284.jpg...
[14/100] Processing Pierre-Auguste_Renoir_215.jpg...
[15/100] Processing Salvador_Dali_100.jpg...
[16/100] Processing Francisco_Goya_163.jpg...
[17/100] Processing Edgar_Degas_108.jpg...
[18/100] Processing Sandro_Botticelli_28.jpg...
[19/100] Processing Edgar_Degas_340.jpg...
[20/100] Processing Paul_Klee_135.jpg...
[21/100] Processing Edgar_Degas_462.jpg...
[22/100] Processing P