In [37]:
# ============================================================
# Cell 0 — CONFIG, FOLDER GUARD, LOGGING
# ------------------------------------------------------------
# - Đặt tham số bài toán và solver.
# - Đảm bảo cấu trúc thư mục theo yêu cầu (tạo nếu thiếu).
# - Khởi tạo logger "chắc ăn" (không bị Jupyter bỏ qua).
# ============================================================

import os, sys, glob, time, math, json, logging, zipfile, heapq
from collections import defaultdict
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm

def ensure_dir(path: str) -> bool:
    """Tạo thư mục nếu chưa có; trả False nếu tạo thất bại."""
    try:
        os.makedirs(path, exist_ok=True)
        return True
    except Exception as e:
        print(f"[ERROR] Cannot create dir {path}: {e}")
        return False

def require_dir(path: str, create: bool = False) -> str:
    """
    Đảm bảo thư mục tồn tại; nếu create=True thì cố gắng tạo.
    Ném lỗi nếu không thể đảm bảo tồn tại.
    """
    if os.path.isdir(path):
        return path
    if create and ensure_dir(path):
        return path
    raise FileNotFoundError(f"Required directory not found: {path}")

# Phát hiện BASE_DIR (nếu notebook nằm trong code/)
CWD = os.getcwd()
BASE_DIR = os.path.abspath(os.path.join(CWD, "..")) if os.path.basename(CWD) == "code" else os.path.abspath(CWD)

# Cấu trúc thư mục theo yêu cầu
CODE_DIR      = os.path.join(BASE_DIR, "code")
IMAGES_DIR    = os.path.join(BASE_DIR, "images")
ORIGIN_DIR    = require_dir(os.path.join(IMAGES_DIR, "origin"), create=True)
RESIZE_DIR    = require_dir(os.path.join(IMAGES_DIR, "resize"), create=True)
PROB_TASK1    = require_dir(os.path.join(IMAGES_DIR, "problem", "task1"), create=True)
PROB_TASK2    = require_dir(os.path.join(IMAGES_DIR, "problem", "task2"), create=True)
SLOVED_TASK1  = require_dir(os.path.join(IMAGES_DIR, "sloved", "task1"), create=True)  # giữ nguyên chính tả "sloved"
SLOVED_TASK2  = require_dir(os.path.join(IMAGES_DIR, "sloved", "task2"), create=True)
OUTPUT_DIR    = require_dir(os.path.join(BASE_DIR, "output"), create=True)

# Tham số bài toán (cố định theo đề)
GRID_R, GRID_C = 3, 5
H, W = 360, 600
TILE_H, TILE_W = H // GRID_R, W // GRID_C   # 120 x 120

# Tham số cost biên (đã tối ưu cho ảnh phong cảnh, nền phẳng)
BAND  = 6      # dải biên rộng hơn giúp ổn định ở vùng phẳng
ALPHA = 1.0    # trọng số MSE màu (Lab; dải đã chuẩn hoá)
BETA  = 0.35   # trọng số MSE gradient
GAMMA = 0.80   # trọng số (1 - SSIM) cho cấu trúc biên
PRIOR_W = 0.15 # trọng số "location prior" (hàng trên/giữa/dưới)

# Reproducibility
np.random.seed(42)

# Đường dẫn output
OUTPUT_CSV  = os.path.join(OUTPUT_DIR, "output.csv")
RUN_LOG     = os.path.join(OUTPUT_DIR, "run.log")
HINTS_FILE  = os.path.join(OUTPUT_DIR, "task1_hints.csv")  # optional: image_filename, top_left_piece_index
SUBMISSION  = os.path.join(OUTPUT_DIR, "submission.zip")

# Reset mọi handler cũ rồi bật logger (tránh việc basicConfig bị bỏ qua trong Jupyter)
for h in list(logging.root.handlers):
    logging.root.removeHandler(h)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    handlers=[
        logging.FileHandler(RUN_LOG, mode="w", encoding="utf-8"),
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger("VOAI2025_Astar")
logger.info(f"BASE_DIR = {BASE_DIR}")
logger.info("Folders are ready. Logging initialized.")


2025-10-27 10:16:21,411 | INFO | BASE_DIR = /home/dammanhdungvn/learn_ml/project
2025-10-27 10:16:21,424 | INFO | Folders are ready. Logging initialized.


In [38]:
# ============================================================
# Cell 1 — IO HELPERS
# ------------------------------------------------------------
# - list_images: liệt kê ảnh an toàn (kèm log nếu lỗi).
# - load_image_rgb: đọc ảnh RGB; assert đúng kích thước 600x360.
# - load_json_sidecar: tìm file .json (perm gốc) cùng basename
#   ở vài vị trí quen thuộc để đánh giá nội bộ/tune tham số.
# ============================================================

def list_images(folder, exts=(".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff")):
    try:
        files = [p for p in glob.glob(os.path.join(folder, "*")) if p.lower().endswith(exts)]
        files.sort()
        return files
    except Exception as e:
        logger.error(f"Cannot list images in {folder}: {e}")
        return []

def load_image_rgb(path):
    bgr = cv2.imread(path, cv2.IMREAD_COLOR)
    if bgr is None:
        raise FileNotFoundError(f"Cannot read image: {path}")
    h, w = bgr.shape[:2]
    if (h, w) != (H, W):
        raise AssertionError(f"Image must be {H}x{W}, got {(h,w)} @ {os.path.basename(path)}")
    return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

def load_json_sidecar(path):
    """
    Tìm file .json chứa perm gốc (ví dụ: {"perm_task1": [...], "perm_task2": [...]})
    để chấm PPA nội bộ. Ưu tiên cùng thư mục với ảnh, sau đó images/* và BASE_DIR.
    """
    base = os.path.splitext(os.path.basename(path))[0]
    candidates = [
        os.path.join(os.path.dirname(path), base + ".json"),
        os.path.join(IMAGES_DIR, "origin", base + ".json"),
        os.path.join(IMAGES_DIR, "problem", "task1", base + ".json"),
        os.path.join(IMAGES_DIR, "problem", "task2", base + ".json"),
        os.path.join(BASE_DIR, base + ".json"),
    ]
    for c in candidates:
        if os.path.isfile(c):
            try:
                with open(c, "r", encoding="utf-8") as f:
                    return json.load(f)
            except Exception as e:
                logger.warning(f"Found JSON but failed to read {c}: {e}")
                return None
    return None


In [39]:
# ============================================================
# Cell 2 — OPTIONAL: RESIZE ORIGIN → RESIZE (600x360)
# ------------------------------------------------------------
# - Dùng khi bạn muốn tự sinh bộ "problem" từ ảnh gốc chuẩn
#   hoặc chỉ muốn xem ảnh chuẩn hoá nhanh trong 600x360.
# ============================================================

def resize_to_hw(bgr, target_h=H, target_w=W):
    return cv2.resize(bgr, (target_w, target_h), interpolation=cv2.INTER_AREA)

origin_files = list_images(ORIGIN_DIR)
resized_cnt = 0
for p in origin_files:
    try:
        bgr = cv2.imread(p, cv2.IMREAD_COLOR)
        if bgr is None:
            continue
        if bgr.shape[:2] != (H, W):
            bgr = resize_to_hw(bgr, H, W); resized_cnt += 1
        outp = os.path.join(RESIZE_DIR, os.path.basename(p))
        cv2.imwrite(outp, bgr)
    except Exception as e:
        logger.error(f"Resize failed @ {p}: {e}")

logger.info(f"Resized {resized_cnt}/{len(origin_files)} images to {RESIZE_DIR}.")


2025-10-27 10:16:22,145 | INFO | Resized 6/6 images to /home/dammanhdungvn/learn_ml/project/images/resize.


In [40]:
# ============================================================
# Cell 3 — TILING & ASSEMBLE
# ------------------------------------------------------------
# - Cắt ảnh xáo trộn thành 15 mảnh 120x120 (theo row-major).
# - Lắp ảnh theo hoán vị solver trả về.
# ============================================================

def tiles_from_shuffled(rgb):
    tiles = []
    for r in range(GRID_R):
        for c in range(GRID_C):
            y0, y1 = r*TILE_H, (r+1)*TILE_H
            x0, x1 = c*TILE_W, (c+1)*TILE_W
            tiles.append(rgb[y0:y1, x0:x1].copy())
    return tiles  # list len=15

def assemble_by_perm(tiles, perm):
    canvas = np.zeros((H, W, 3), dtype=np.uint8)
    k = 0
    for r in range(GRID_R):
        for c in range(GRID_C):
            y0, y1 = r*TILE_H, (r+1)*TILE_H
            x0, x1 = c*TILE_W, (c+1)*TILE_W
            canvas[y0:y1, x0:x1] = tiles[perm[k]]
            k += 1
    return canvas


In [41]:
# ============================================================
# Cell 4 — EDGE COST v2
# ------------------------------------------------------------
# - So khớp dải biên R/L/U/D giữa hai mảnh bằng 3 tín hiệu:
#   (1) MSE màu (Lab) sau chuẩn hoá dải; (2) MSE gradient;
#   (3) 1 - SSIM trên kênh L (cấu trúc).
# - Tiền tính (Lab, ∇x, ∇y) cho từng mảnh để tăng tốc.
# ============================================================

from skimage.color import rgb2lab
from skimage.metrics import structural_similarity as ssim

DIRS = ['L','R','U','D']
OPP  = {'L':'R','R':'L','U':'D','D':'U'}

def _lab01(tile_u8):
    return rgb2lab(tile_u8.astype(np.float32)/255.0)

def _band(img, side, band=BAND):
    if side == 'L': return img[:, :band, :]
    if side == 'R': return img[:, -band:, :]
    if side == 'U': return img[:band, :, :]
    if side == 'D': return img[-band:, :, :]
    raise ValueError(side)

def _grad_x(img):
    gx = img[:, 1:, :] - img[:, :-1, :]
    gx = np.pad(gx, ((0,0),(0,1),(0,0)), mode='edge')
    return gx

def _grad_y(img):
    gy = img[1:, :, :] - img[:-1, :, :]
    gy = np.pad(gy, ((0,1),(0,0),(0,0)), mode='edge')
    return gy

def _normalize_band(arr):
    mu = arr.mean(axis=(0,1), keepdims=True)
    sd = arr.std(axis=(0,1), keepdims=True) + 1e-6
    return (arr - mu)/sd

def precompute_feats(tiles):
    feats = []
    for t in tiles:
        lab = _lab01(t)
        gx  = _grad_x(lab)
        gy  = _grad_y(lab)
        feats.append((lab, gx, gy))
    return feats

def edge_cost_from_feats_v2(fA, fB, direction, band=BAND, alpha=ALPHA, beta=BETA, gamma=GAMMA):
    labA, gxA, gyA = fA
    labB, gxB, gyB = fB

    if direction == 'R':
        a_c, b_c = _band(labA, 'R', band), _band(labB, 'L', band)
        a_g, b_g = gxA[:, -band:, :],     gxB[:, :band, :]
    elif direction == 'L':
        a_c, b_c = _band(labA, 'L', band), _band(labB, 'R', band)
        a_g, b_g = gxA[:, :band, :],      gxB[:, -band:, :]
    elif direction == 'D':
        a_c, b_c = _band(labA, 'D', band), _band(labB, 'U', band)
        a_g, b_g = gyA[-band:, :, :],     gyB[:band, :, :]
    elif direction == 'U':
        a_c, b_c = _band(labA, 'U', band), _band(labB, 'D', band)
        a_g, b_g = gyA[:band, :, :],      gyB[-band:, :, :]
    else:
        raise ValueError(direction)

    # 1) MSE màu & 2) MSE gradient (sau chuẩn hoá dải)
    a_c_n, b_c_n = _normalize_band(a_c), _normalize_band(b_c)
    a_g_n, b_g_n = _normalize_band(a_g), _normalize_band(b_g)
    mse_c = np.mean((a_c_n - b_c_n)**2)
    mse_g = np.mean((a_g_n - b_g_n)**2)

    # 3) SSIM trên kênh L → cost = (1 - ssim)
    a_l = a_c[..., 0]; b_l = b_c[..., 0]
    a_l_n = (a_l - a_l.mean())/(a_l.std()+1e-6)
    b_l_n = (b_l - b_l.mean())/(b_l.std()+1e-6)
    try:
        ssim_val = ssim(a_l_n, b_l_n, data_range=(a_l_n.max()-a_l_n.min()))
    except Exception:
        ssim_val = 0.0
    cost_ssim = 1.0 - float(ssim_val)

    return alpha*mse_c + beta*mse_g + gamma*cost_ssim


In [42]:
# ============================================================
# Cell 5 — PRECOMPUTE COSTS & ORDERS
# ------------------------------------------------------------
# - costs[d][i, j]: chi phí ghép i→j theo hướng d.
# - orders[d][i]: danh sách j sắp theo chi phí tăng dần cho (i,d).
# ============================================================

def precompute_costs_and_orders_v2(feats):
    n = len(feats)
    costs = {d: np.full((n, n), np.inf, dtype=np.float32) for d in DIRS}
    for i in range(n):
        for j in range(n):
            if i == j: 
                continue
            for d in DIRS:
                costs[d][i, j] = edge_cost_from_feats_v2(feats[i], feats[j], d)
    orders = {d: [] for d in DIRS}
    for d in DIRS:
        for i in range(n):
            idx = np.argsort(costs[d][i, :]).tolist()
            orders[d].append([k for k in idx if k != i])
    return costs, orders


In [43]:
# ============================================================
# Cell 6 — LOCATION PRIOR
# ------------------------------------------------------------
# - Ước lượng hàng (row) cho mỗi mảnh từ trung bình Lab:
#   hàng 0 (trên): bầu trời xanh → kênh b nhỏ, a≈0;
#   hàng 2 (dưới): thảm cỏ vàng/xanh → b lớn, a hơi âm.
# - Trả ma trận prior[piece, position] (cost mềm).
# ============================================================

from skimage.color import rgb2lab

def compute_location_prior(tiles):
    n = len(tiles)
    pri = np.zeros((n, GRID_R*GRID_C), dtype=np.float32)
    labs = [rgb2lab(t.astype(np.float32)/255.0) for t in tiles]
    means = [(L[...,0].mean(), L[...,1].mean(), L[...,2].mean()) for L in labs]  # (L,a,b)

    targets_b = [-5.0,  5.0, 20.0]  # row 0→1→2
    targets_a = [ 0.0,  0.0, -5.0]

    for p, (L, a, b) in enumerate(means):
        for idx in range(GRID_R*GRID_C):
            r = idx // GRID_C
            tb, ta = targets_b[r], targets_a[r]
            # Khoảng cách chuẩn hoá (mềm) → cost
            pri[p, idx] = ((b - tb)/20.0)**2 + 0.5*((a - ta)/15.0)**2
    return pri


In [44]:
# ============================================================
# Cell 7 — HEURISTIC (ADMISSIBLE) + A* WITH PRIOR
# ------------------------------------------------------------
# - Heuristic h = LB cạnh mở (R/D) + LB prior cho những vị trí còn trống.
# - A* tối ưu tổng: (cost biên kề) + PRIOR_W * prior vị trí.
# - Cắt tỉa bằng best_g[(idx, used_mask)] để tránh mở rộng trạng thái tệ.
# ============================================================

def right_index(idx):
    r, c = divmod(idx, GRID_C)
    return idx + 1 if c + 1 < GRID_C else None

def down_index(idx):
    r, c = divmod(idx, GRID_C)
    return idx + GRID_C if r + 1 < GRID_R else None

def min_unused_for(i, d, costs, orders, used_mask):
    for j in orders[d][i]:
        if not ((used_mask >> j) & 1):
            return float(costs[d][i, j])
    return 0.0

def heuristic_with_prior(placement, used_mask, costs, orders, prior):
    # LB cho các cạnh mở (R/D)
    t = len(placement); h = 0.0
    for i in range(t):
        p = placement[i]
        jR = right_index(i)
        if jR is not None and jR >= t:
            h += min_unused_for(p, 'R', costs, orders, used_mask)
        jD = down_index(i)
        if jD is not None and jD >= t:
            h += min_unused_for(p, 'D', costs, orders, used_mask)
    # LB cho prior của các ô chưa đặt
    remaining_positions = list(range(t, GRID_R*GRID_C))
    unused = [j for j in range(prior.shape[0]) if not ((used_mask >> j) & 1)]
    for pos in remaining_positions:
        if not unused: break
        minp = min(prior[j, pos] for j in unused)
        h += PRIOR_W * minp
    return h

def solve_astar_with_prior(tiles, costs, orders, prior, initial_piece=None, log_prefix=""):
    n = len(tiles)
    total_cells = n
    frontier = []
    best_g = {}  # (idx, used_mask) -> best g đã thấy

    def push_state(placement, used_mask, g):
        h = heuristic_with_prior(placement, used_mask, costs, orders, prior)
        heapq.heappush(frontier, (g+h, g, placement, used_mask))

    # Seed: Task1 có hint → cố định (0,0); Task2 → thử tất cả
    if initial_piece is not None:
        used = (1 << initial_piece)
        placement = [initial_piece]
        g0 = PRIOR_W * prior[initial_piece, 0]     # đừng quên cộng prior vào g0
        push_state(placement, used, g0)
        best_g[(1, used)] = g0
    else:
        for p in range(n):
            used = (1 << p)
            placement = [p]
            g0 = PRIOR_W * prior[p, 0]
            push_state(placement, used, g0)
            best_g[(1, used)] = g0

    expansions = 0
    while frontier:
        f, g, placement, used = heapq.heappop(frontier)
        idx = len(placement)

        if idx == total_cells:
            logger.info(f"{log_prefix}A* done | expansions={expansions} | best_cost={g:.6f}")
            return placement, g

        if best_g.get((idx, used), float('inf')) < g - 1e-12:
            continue

        r, c = divmod(idx, GRID_C)
        left_idx = idx - 1 if c - 1 >= 0 else None
        up_idx   = idx - GRID_C if r - 1 >= 0 else None

        # Ứng viên còn trống, ưu tiên theo cost cục bộ + prior vị trí
        candidates = [p for p in range(n) if not ((used >> p) & 1)]
        def local_increment(p):
            inc = 0.0
            if left_idx is not None: inc += float(costs['R'][placement[left_idx], p])
            if up_idx   is not None: inc += float(costs['D'][placement[up_idx], p])
            inc += PRIOR_W * prior[p, idx]
            return inc
        candidates.sort(key=local_increment)

        for p in candidates:
            g_add = 0.0
            if left_idx is not None: g_add += float(costs['R'][placement[left_idx], p])
            if up_idx   is not None: g_add += float(costs['D'][placement[up_idx], p])
            g_add += PRIOR_W * prior[p, idx]

            used_new = used | (1 << p)
            placement_new = placement + [p]
            g_new = g + g_add
            key = (idx + 1, used_new)

            if g_new + 1e-12 < best_g.get(key, float('inf')):
                best_g[key] = g_new
                push_state(placement_new, used_new, g_new)

        expansions += 1
        if expansions % 5000 == 0:
            logger.info(f"{log_prefix}expansions={expansions}, frontier={len(frontier)}")

    raise RuntimeError(f"{log_prefix}No solution found.")


In [45]:
# ============================================================
# Cell 8 — SOLVE ONE IMAGE & TASK RUNNERS
# ------------------------------------------------------------
# - solve_one_image: tách mảnh → precompute → prior → A* → assemble.
# - run_task_dir: duyệt thư mục, lưu ảnh đã khôi phục vào images/sloved/*.
# ============================================================

def load_hints_csv(path):
    if not os.path.isfile(path):
        logger.warning(f"Hint file not found: {path}")
        return {}
    df = pd.read_csv(path)
    mp = {}
    for _, row in df.iterrows():
        mp[str(row["image_filename"]).strip()] = int(row["top_left_piece_index"])
    logger.info(f"Loaded {len(mp)} task1 hints.")
    return mp

def solve_one_image(path, initial_piece=None):
    rgb = load_image_rgb(path)
    tiles = tiles_from_shuffled(rgb)
    feats = precompute_feats(tiles)
    costs, orders = precompute_costs_and_orders_v2(feats)
    prior = compute_location_prior(tiles)
    perm, best_cost = solve_astar_with_prior(tiles, costs, orders, prior, initial_piece=initial_piece, log_prefix=f"{os.path.basename(path)} | ")
    vis = assemble_by_perm(tiles, perm)
    return perm, best_cost, vis

def run_task_dir(prob_dir, out_dir, use_hint=False, hints_file=None):
    os.makedirs(out_dir, exist_ok=True)
    files = list_images(prob_dir)
    hints = load_hints_csv(hints_file) if use_hint else {}
    rows = []

    for path in tqdm(files, desc=f"Solving {os.path.basename(prob_dir)}"):
        fname = os.path.basename(path)
        try:
            hint = hints.get(fname, None) if use_hint else None
            perm, cost, vis = solve_one_image(path, initial_piece=hint)
            cv2.imwrite(os.path.join(out_dir, fname), cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))
            rows.append([fname] + perm)
            logger.info(f"[{fname}] cost={cost:.6f}")
        except AssertionError as e:
            logger.error(f"{fname}: {e}")
        except FileNotFoundError as e:
            logger.error(f"{fname}: {e}")
        except Exception as e:
            logger.exception(f"{fname}: unexpected error")
    return rows


In [46]:
# ============================================================
# Cell 9 — RUN PIPELINE (TASK1 → TASK2) & WRITE CSV
# ------------------------------------------------------------
# - Kết quả có format chấm:
#   image_filename, piece_at_0_0, ..., piece_at_2_4
# ============================================================

rows_t1 = run_task_dir(PROB_TASK1, SLOVED_TASK1, use_hint=True, hints_file=HINTS_FILE)
rows_t2 = run_task_dir(PROB_TASK2, SLOVED_TASK2, use_hint=False, hints_file=None)

cols = ["image_filename"] + [f"piece_at_{r}_{c}" for r in range(GRID_R) for c in range(GRID_C)]
df_out = pd.DataFrame(rows_t1 + rows_t2, columns=cols)
df_out.sort_values("image_filename", inplace=True)
df_out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")
logger.info(f"Wrote {OUTPUT_CSV} with {len(df_out)} rows.")
df_out.head()




Solving task1:   0%|                                                                                | 0/6 [00:00<?, ?it/s]

2025-10-27 10:16:23,608 | INFO | origin1.png | expansions=5000, frontier=9474
2025-10-27 10:16:24,533 | INFO | origin1.png | expansions=10000, frontier=13132
2025-10-27 10:16:24,821 | INFO | origin1.png | expansions=15000, frontier=15220
2025-10-27 10:16:25,088 | INFO | origin1.png | expansions=20000, frontier=13863
2025-10-27 10:16:25,339 | INFO | origin1.png | expansions=25000, frontier=9893
2025-10-27 10:16:25,630 | INFO | origin1.png | expansions=30000, frontier=4623
2025-10-27 10:16:25,651 | INFO | origin1.png | A* done | expansions=31079 | best_cost=58.488384
2025-10-27 10:16:25,892 | INFO | [origin1.png] cost=58.488384


Solving task1:  17%|████████████                                                            | 1/6 [00:03<00:17,  3.52s/it]

2025-10-27 10:16:26,901 | INFO | origin2.png | expansions=5000, frontier=10125
2025-10-27 10:16:27,252 | INFO | origin2.png | expansions=10000, frontier=13955
2025-10-27 10:16:27,719 | INFO | origin2.png | expansions=15000, frontier=15496
2025-10-27 10:16:28,084 | INFO | origin2.png | expansions=20000, frontier=14248
2025-10-27 10:16:28,346 | INFO | origin2.png | expansions=25000, frontier=10560
2025-10-27 10:16:28,502 | INFO | origin2.png | A* done | expansions=28689 | best_cost=53.261456
2025-10-27 10:16:28,523 | INFO | [origin2.png] cost=53.261456


Solving task1:  33%|████████████████████████                                                | 2/6 [00:06<00:11,  3.00s/it]

2025-10-27 10:16:29,767 | INFO | origin3.png | expansions=5000, frontier=10748
2025-10-27 10:16:30,303 | INFO | origin3.png | expansions=10000, frontier=14449
2025-10-27 10:16:30,691 | INFO | origin3.png | expansions=15000, frontier=17047
2025-10-27 10:16:31,423 | INFO | origin3.png | expansions=20000, frontier=16177
2025-10-27 10:16:31,677 | INFO | origin3.png | expansions=25000, frontier=12270
2025-10-27 10:16:31,889 | INFO | origin3.png | expansions=30000, frontier=6264
2025-10-27 10:16:31,891 | INFO | origin3.png | A* done | expansions=30001 | best_cost=48.727581
2025-10-27 10:16:31,908 | INFO | [origin3.png] cost=48.727581


Solving task1:  50%|████████████████████████████████████                                    | 3/6 [00:09<00:09,  3.17s/it]

2025-10-27 10:16:32,871 | INFO | origin4.png | expansions=5000, frontier=8515
2025-10-27 10:16:33,285 | INFO | origin4.png | expansions=10000, frontier=13175
2025-10-27 10:16:33,747 | INFO | origin4.png | expansions=15000, frontier=15456
2025-10-27 10:16:34,015 | INFO | origin4.png | expansions=20000, frontier=14538
2025-10-27 10:16:34,221 | INFO | origin4.png | expansions=25000, frontier=10593
2025-10-27 10:16:34,392 | INFO | origin4.png | expansions=30000, frontier=4856
2025-10-27 10:16:34,446 | INFO | origin4.png | A* done | expansions=32055 | best_cost=60.189018
2025-10-27 10:16:34,466 | INFO | [origin4.png] cost=60.189018


Solving task1:  67%|████████████████████████████████████████████████                        | 4/6 [00:12<00:05,  2.93s/it]

2025-10-27 10:16:35,404 | INFO | origin5.png | expansions=5000, frontier=10008
2025-10-27 10:16:35,798 | INFO | origin5.png | expansions=10000, frontier=13615
2025-10-27 10:16:36,141 | INFO | origin5.png | expansions=15000, frontier=16289
2025-10-27 10:16:36,427 | INFO | origin5.png | expansions=20000, frontier=15517
2025-10-27 10:16:36,668 | INFO | origin5.png | expansions=25000, frontier=11504
2025-10-27 10:16:36,843 | INFO | origin5.png | expansions=30000, frontier=5598
2025-10-27 10:16:36,875 | INFO | origin5.png | A* done | expansions=31452 | best_cost=57.410530
2025-10-27 10:16:36,896 | INFO | [origin5.png] cost=57.410530


Solving task1:  83%|████████████████████████████████████████████████████████████            | 5/6 [00:14<00:02,  2.75s/it]

2025-10-27 10:16:37,912 | INFO | origin6.png | expansions=5000, frontier=9818
2025-10-27 10:16:38,294 | INFO | origin6.png | expansions=10000, frontier=13613
2025-10-27 10:16:38,582 | INFO | origin6.png | expansions=15000, frontier=15940
2025-10-27 10:16:38,863 | INFO | origin6.png | expansions=20000, frontier=15446
2025-10-27 10:16:39,060 | INFO | origin6.png | expansions=25000, frontier=11532
2025-10-27 10:16:39,199 | INFO | origin6.png | expansions=30000, frontier=5330
2025-10-27 10:16:39,253 | INFO | origin6.png | A* done | expansions=32234 | best_cost=66.933647
2025-10-27 10:16:39,274 | INFO | [origin6.png] cost=66.933647


Solving task1: 100%|████████████████████████████████████████████████████████████████████████| 6/6 [00:16<00:00,  2.82s/it]
Solving task2:   0%|                                                                                | 0/6 [00:00<?, ?it/s]

2025-10-27 10:16:40,176 | INFO | origin1.png | expansions=5000, frontier=9474
2025-10-27 10:16:40,567 | INFO | origin1.png | expansions=10000, frontier=13132
2025-10-27 10:16:40,919 | INFO | origin1.png | expansions=15000, frontier=15220
2025-10-27 10:16:41,186 | INFO | origin1.png | expansions=20000, frontier=13863
2025-10-27 10:16:41,390 | INFO | origin1.png | expansions=25000, frontier=9893
2025-10-27 10:16:41,521 | INFO | origin1.png | expansions=30000, frontier=4623
2025-10-27 10:16:41,551 | INFO | origin1.png | A* done | expansions=31079 | best_cost=58.488384
2025-10-27 10:16:41,576 | INFO | [origin1.png] cost=58.488384


Solving task2:  17%|████████████                                                            | 1/6 [00:02<00:11,  2.30s/it]

2025-10-27 10:16:42,522 | INFO | origin2.png | expansions=5000, frontier=10125
2025-10-27 10:16:42,864 | INFO | origin2.png | expansions=10000, frontier=13955
2025-10-27 10:16:43,237 | INFO | origin2.png | expansions=15000, frontier=15496
2025-10-27 10:16:43,502 | INFO | origin2.png | expansions=20000, frontier=14248
2025-10-27 10:16:43,717 | INFO | origin2.png | expansions=25000, frontier=10560
2025-10-27 10:16:43,815 | INFO | origin2.png | A* done | expansions=28689 | best_cost=53.261456
2025-10-27 10:16:43,836 | INFO | [origin2.png] cost=53.261456


Solving task2:  33%|████████████████████████                                                | 2/6 [00:04<00:09,  2.28s/it]

2025-10-27 10:16:44,761 | INFO | origin3.png | expansions=5000, frontier=10748
2025-10-27 10:16:45,169 | INFO | origin3.png | expansions=10000, frontier=14449
2025-10-27 10:16:45,509 | INFO | origin3.png | expansions=15000, frontier=17047
2025-10-27 10:16:45,792 | INFO | origin3.png | expansions=20000, frontier=16177
2025-10-27 10:16:46,001 | INFO | origin3.png | expansions=25000, frontier=12270
2025-10-27 10:16:46,140 | INFO | origin3.png | expansions=30000, frontier=6264
2025-10-27 10:16:46,141 | INFO | origin3.png | A* done | expansions=30001 | best_cost=48.727581
2025-10-27 10:16:46,198 | INFO | [origin3.png] cost=48.727581


Solving task2:  50%|████████████████████████████████████                                    | 3/6 [00:06<00:06,  2.32s/it]

2025-10-27 10:16:47,250 | INFO | origin4.png | expansions=5000, frontier=8515
2025-10-27 10:16:47,639 | INFO | origin4.png | expansions=10000, frontier=13175
2025-10-27 10:16:47,957 | INFO | origin4.png | expansions=15000, frontier=15456
2025-10-27 10:16:48,211 | INFO | origin4.png | expansions=20000, frontier=14538
2025-10-27 10:16:48,408 | INFO | origin4.png | expansions=25000, frontier=10593
2025-10-27 10:16:48,549 | INFO | origin4.png | expansions=30000, frontier=4856
2025-10-27 10:16:48,598 | INFO | origin4.png | A* done | expansions=32055 | best_cost=60.189018
2025-10-27 10:16:48,618 | INFO | [origin4.png] cost=60.189018


Solving task2:  67%|████████████████████████████████████████████████                        | 4/6 [00:09<00:04,  2.36s/it]

2025-10-27 10:16:49,565 | INFO | origin5.png | expansions=5000, frontier=10008
2025-10-27 10:16:49,959 | INFO | origin5.png | expansions=10000, frontier=13615
2025-10-27 10:16:50,300 | INFO | origin5.png | expansions=15000, frontier=16289
2025-10-27 10:16:50,565 | INFO | origin5.png | expansions=20000, frontier=15517
2025-10-27 10:16:50,777 | INFO | origin5.png | expansions=25000, frontier=11504
2025-10-27 10:16:50,924 | INFO | origin5.png | expansions=30000, frontier=5598
2025-10-27 10:16:50,965 | INFO | origin5.png | A* done | expansions=31452 | best_cost=57.410530
2025-10-27 10:16:50,981 | INFO | [origin5.png] cost=57.410530


Solving task2:  83%|████████████████████████████████████████████████████████████            | 5/6 [00:11<00:02,  2.36s/it]

2025-10-27 10:16:51,913 | INFO | origin6.png | expansions=5000, frontier=9818
2025-10-27 10:16:52,357 | INFO | origin6.png | expansions=10000, frontier=13613
2025-10-27 10:16:52,718 | INFO | origin6.png | expansions=15000, frontier=15940
2025-10-27 10:16:53,016 | INFO | origin6.png | expansions=20000, frontier=15446
2025-10-27 10:16:53,217 | INFO | origin6.png | expansions=25000, frontier=11532
2025-10-27 10:16:53,399 | INFO | origin6.png | expansions=30000, frontier=5330
2025-10-27 10:16:53,442 | INFO | origin6.png | A* done | expansions=32234 | best_cost=66.933647
2025-10-27 10:16:53,459 | INFO | [origin6.png] cost=66.933647


Solving task2: 100%|████████████████████████████████████████████████████████████████████████| 6/6 [00:14<00:00,  2.36s/it]

2025-10-27 10:16:53,478 | INFO | Wrote /home/dammanhdungvn/learn_ml/project/output/output.csv with 12 rows.





Unnamed: 0,image_filename,piece_at_0_0,piece_at_0_1,piece_at_0_2,piece_at_0_3,piece_at_0_4,piece_at_1_0,piece_at_1_1,piece_at_1_2,piece_at_1_3,piece_at_1_4,piece_at_2_0,piece_at_2_1,piece_at_2_2,piece_at_2_3,piece_at_2_4
0,origin1.png,0,4,12,8,13,10,2,14,9,6,3,1,5,7,11
6,origin1.png,11,1,7,14,3,10,2,5,4,0,12,8,13,9,6
1,origin2.png,0,13,8,2,3,9,6,1,11,7,5,12,14,4,10
7,origin2.png,8,14,10,3,11,7,13,9,4,2,6,12,5,1,0
2,origin3.png,0,14,10,11,7,12,13,2,5,6,4,8,1,9,3


In [47]:
# ============================================================
# Cell 10 — OPTIONAL: EVALUATE PPA VIA JSON SIDECAR
# ------------------------------------------------------------
# - Nếu bạn có *.json chứa perm_task1/perm_task2 (gốc),
#   hàm dưới sẽ tính PPA trung bình cho mỗi task.
# ============================================================

def ppa_score(pred_perm, gt_perm) -> float:
    pred = np.array(pred_perm, dtype=int)
    gt   = np.array(gt_perm, dtype=int)
    return 100.0 * (pred == gt).mean()

def eval_folder_with_json(prob_dir, task_id):
    files = list_images(prob_dir)
    scores = []
    for p in files:
        fname = os.path.basename(p)
        js = load_json_sidecar(p)
        if js is None:
            continue
        gt = js.get("perm_task1" if task_id==1 else "perm_task2")
        if gt is None:
            continue
        row = df_out[df_out.image_filename == fname]
        if len(row) == 0:
            continue
        pred = row.iloc[0, 1:].astype(int).tolist()
        scores.append(ppa_score(pred, gt))
    if scores:
        logger.info(f"[Eval JSON] {os.path.basename(prob_dir)} mean PPA={np.mean(scores):.2f}% on {len(scores)} imgs")
    else:
        logger.info(f"[Eval JSON] No matched JSON found in {prob_dir}")

# Gọi nếu muốn đánh giá nội bộ (không bắt buộc cho nộp bài)
eval_folder_with_json(PROB_TASK1, task_id=1)
eval_folder_with_json(PROB_TASK2, task_id=2)


2025-10-27 10:16:53,498 | INFO | [Eval JSON] No matched JSON found in /home/dammanhdungvn/learn_ml/project/images/problem/task1
2025-10-27 10:16:53,500 | INFO | [Eval JSON] No matched JSON found in /home/dammanhdungvn/learn_ml/project/images/problem/task2


In [48]:
# ============================================================
# Cell 11 — OPTIONAL: QUICK DEBUG SINGLE IMAGE
# ------------------------------------------------------------
# - Đặt đường dẫn ảnh cần test (ảnh xáo trộn).
# - In perm & cost; lưu ảnh assemble về output/debug_assembled.png.
# ============================================================

TEST_IMG = None  # ví dụ: os.path.join(PROB_TASK2, "your_image.png")

if TEST_IMG:
    try:
        perm, cost, vis = solve_one_image(TEST_IMG, initial_piece=None)
        dbg_path = os.path.join(OUTPUT_DIR, "debug_assembled.png")
        cv2.imwrite(dbg_path, cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))
        logger.info(f"[DEBUG] {os.path.basename(TEST_IMG)} -> cost={cost:.6f}")
        logger.info(f"[DEBUG] perm={perm}")
        logger.info(f"[DEBUG] saved={dbg_path}")
    except Exception as e:
        logger.exception(f"[DEBUG] Fail on {TEST_IMG}: {e}")


In [49]:
# ============================================================
# Cell 12 — OPTIONAL: PACKAGE SUBMISSION
# ------------------------------------------------------------
# - Nén output.csv + notebook (.ipynb) vào output/submission.zip
#   (đặt đúng tên file notebook của bạn).
# ============================================================

NOTEBOOK_FILENAME = "solver_astar_prior.ipynb"   # ← sửa đúng tên notebook

with zipfile.ZipFile(SUBMISSION, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
    if os.path.isfile(OUTPUT_CSV):
        zf.write(OUTPUT_CSV, arcname="output.csv")
    nb_local = os.path.join(os.getcwd(), NOTEBOOK_FILENAME)
    nb_alt   = os.path.join(CODE_DIR, NOTEBOOK_FILENAME)
    nb_path  = nb_local if os.path.isfile(nb_local) else nb_alt
    if os.path.isfile(nb_path):
        zf.write(nb_path, arcname=NOTEBOOK_FILENAME)

logger.info(f"Created {SUBMISSION}.")


2025-10-27 10:16:53,572 | INFO | Created /home/dammanhdungvn/learn_ml/project/output/submission.zip.
