In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

the_blind_flight_synapse_drive_ps_1_path = kagglehub.competition_download('the-blind-flight-synapse-drive-ps-1')
championsproo_subfill4_path = kagglehub.dataset_download('championsproo/subfill4')
championsproo_blindflightclassmaping_path = kagglehub.dataset_download('championsproo/blindflightclassmaping')
championsproo_blindflightv13_pytorch_default_1_path = kagglehub.model_download('championsproo/blindflightv13/PyTorch/default/1')

print('Data source import complete.')


In [None]:
import cv2
import numpy as np
import os
from pathlib import Path
from tqdm import tqdm
import random

# ==========================================
# 1. CONFIGURATION
# ==========================================
ASSETS_ROOT = Path("/kaggle/input/the-blind-flight-synapse-drive-ps-1/SynapseDrive_Dataset/assets")
OUTPUT_DIR = Path("./generated_dataset_final_v13")
IMG_SIZE = 128
BASE_SAMPLES = 2000

# ==========================================
# 2. MAPPING
# ==========================================
ASSET_MAP = {
    "t1_sand": "Desert_Road", "t1_cacti": "Desert_Cacti", "t1_rocks": "Desert_Rocks",
    "t1_quicksand": "Desert_Hazard", "t1_rover": "Desert_Start", "t1_goal": "Desert_End",

    "t0_dirt": "Forest_Road", "t0_tree": "Forest_Tree", "t0_puddle": "Forest_Hazard",
    "t0_startship": "Forest_Start", "t0_goal": "Forest_End",

    "t2_floor": "Lab_Road", "t2_wall": "Lab_Wall", "t2_plasma": "Lab_Plasma",
    "t2_glue": "Lab_Hazard", "t2_drone": "Lab_Start", "t2_goal": "Lab_End",
}

# ==========================================
# 3. AUGMENTATION ENGINES
# ==========================================
def apply_soft_blur(img):
    k = random.choice([3, 5])
    return cv2.GaussianBlur(img, (k, k), 0)

def apply_diagonal_cut(img):
    if not img.flags['C_CONTIGUOUS']: img = np.ascontiguousarray(img)
    h, w = img.shape[:2]
    corner = random.choice(['tl', 'tr', 'bl', 'br'])
    cut_size = random.randint(30, 60)
    pts = []
    if corner == 'tl': pts = np.array([[0,0], [cut_size,0], [0,cut_size]])
    elif corner == 'tr': pts = np.array([[w,0], [w-cut_size,0], [w,cut_size]])
    elif corner == 'bl': pts = np.array([[0,h], [cut_size,h], [0,h-cut_size]])
    elif corner == 'br': pts = np.array([[w,h], [w-cut_size,h], [w,h-cut_size]])
    cv2.fillPoly(img, [pts], (0, 0, 0))
    return img

def apply_tint(img):
    if random.random() > 0.8: return img
    overlay = np.zeros_like(img)
    mode = random.choice(["Cyan", "Purple", "Yellow", "Blue", "Green", "Red", "Dark"])
    b, g, r = 0, 0, 0
    intensity = random.randint(30, 70)
    if mode == "Cyan": b, g = intensity+40, intensity+40
    elif mode == "Purple": b, r = intensity+40, intensity+40
    elif mode == "Yellow": g, r = intensity+40, intensity+40
    elif mode == "Blue": b = intensity+60
    elif mode == "Green": g = intensity+60
    elif mode == "Red": r = intensity+60
    overlay[:] = (b, g, r)
    return cv2.addWeighted(img, 0.75, overlay, 0.25, 0)

def augment_v13_boosted(img, label, force_mirror=False):
    aug = img.copy()

    # 1. Base Rotation
    k = random.choice([0, 1, 2, 3])
    aug = np.rot90(aug, k)
    aug = np.ascontiguousarray(aug)

    # 2. Mirroring (Forced or Random)
    if force_mirror:
        aug = cv2.flip(aug, 1) # Force flip horizontal
    elif random.random() > 0.5:
        aug = cv2.flip(aug, 1)

    is_vip = "Start" in label or "End" in label

    # 3. Diagonal Cut (SKIP for VIPs)
    if not is_vip and random.random() > 0.8:
        aug = apply_diagonal_cut(aug)

    # 4. Soft Blur (SKIP for VIPs)
    if not is_vip and random.random() > 0.7:
        aug = apply_soft_blur(aug)

    # 5. Tints (Everyone gets lighting changes)
    aug = apply_tint(aug)

    return aug

# ==========================================
# 4. GENERATOR
# ==========================================
def generate_final_dataset():
    if OUTPUT_DIR.exists():
        import shutil
        shutil.rmtree(OUTPUT_DIR)
    OUTPUT_DIR.mkdir(parents=True)

    all_assets = list(ASSETS_ROOT.rglob("*.png"))
    total_images = 0

    print(f"Generating V13 (Rover Boost + Mirroring)...")

    for asset_path in all_assets:
        filename = asset_path.name.lower().replace(".png", "")
        class_label = None
        for key, label in ASSET_MAP.items():
            if key in filename:
                class_label = label
                break

        if class_label is None:
            if "rover" in filename: class_label = "Desert_Start"
            elif "startship" in filename: class_label = "Forest_Start"
            elif "drone" in filename: class_label = "Lab_Start"
            elif "goal" in filename:
                if "t0" in filename: class_label = "Forest_End"
                elif "t1" in filename: class_label = "Desert_End"
                elif "t2" in filename: class_label = "Lab_End"
                else: class_label = "Desert_End"
            else: continue

        class_dir = OUTPUT_DIR / class_label
        class_dir.mkdir(exist_ok=True)

        original = cv2.imread(str(asset_path))
        if original is None: continue
        original = cv2.resize(original, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)

        # --- OVERSAMPLING LOGIC ---
        # If it's the tricky Desert Rover, generate 3x samples!
        current_target = BASE_SAMPLES
        if class_label == "Desert_Start":
            current_target = BASE_SAMPLES * 3
            print(f"   >>> Boosting {class_label} to {current_target} samples")

        for i in range(current_target):
            # Force half of the boosted set to be mirrored
            force_flip = (i % 2 == 0)
            img = augment_v13_boosted(original, class_label, force_mirror=force_flip)

            cv2.imwrite(str(class_dir / f"{class_label}_{i}.png"), img)
            total_images += 1

    print(f"‚úÖ V13 Dataset Ready ({total_images} images).")

if __name__ == "__main__":
    generate_final_dataset()

In [None]:
import matplotlib.pyplot as plt
import cv2
import random
from pathlib import Path
import math

# ==========================================
# CONFIGURATION
# ==========================================
DATASET_DIR = Path("/kaggle/working/generated_dataset_final_v13")
SAMPLES_TO_SHOW = 20  # Total images to display

def visualize_dataset():
    if not DATASET_DIR.exists():
        print(f"‚ùå Error: Directory {DATASET_DIR} not found. Run generator first.")
        return

    # 1. Gather all image paths with their labels
    all_images = []
    classes = [d.name for d in DATASET_DIR.iterdir() if d.is_dir()]

    print(f"Found classes: {classes}")

    for cls in classes:
        cls_dir = DATASET_DIR / cls
        imgs = list(cls_dir.glob("*.png"))
        for img in imgs:
            all_images.append((img, cls))

    if not all_images:
        print("‚ùå No images found.")
        return

    # 2. Pick Random Samples
    samples = random.sample(all_images, min(len(all_images), SAMPLES_TO_SHOW))

    # 3. Plot Grid
    cols = 5
    rows = math.ceil(len(samples) / cols)

    plt.figure(figsize=(15, 3 * rows))

    for i, (img_path, label) in enumerate(samples):
        # Load exactly as the model would (resizing logic is already baked into the file)
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.subplot(rows, cols, i + 1)
        plt.imshow(img)
        plt.title(f"{label}\n{img.shape}", fontsize=9)
        plt.axis('off')

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    visualize_dataset()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import json
from pathlib import Path
from tqdm import tqdm

# ==========================================
# 1. CONFIGURATION
# ==========================================
DATA_DIR = Path("/kaggle/working/generated_dataset_final_v13")
MODEL_SAVE_PATH = "terrain_classifier_resnet_v12_1.pth"
MAPPING_SAVE_PATH = "class_mapping.json"
BATCH_SIZE = 128  # Large batch size to feed both GPUs
EPOCHS = 15
LEARNING_RATE = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. DATA LOADING (128x128)
# ==========================================
# Normalization matches ImageNet stats (ResNet standard)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=transform)
train_size = int(0.9 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# num_workers=4 ensures the CPU loads data fast enough to keep GPUs busy
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

# ==========================================
# 3. DUAL-GPU MODEL SETUP
# ==========================================
print("üöÄ Building ResNet-18 (V10 Battle Mode)...")
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(full_dataset.classes))

# --- DUAL GPU ACTIVATION ---
if torch.cuda.device_count() > 1:
    print(f"üî• Twin-Turbo Active: Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)
else:
    print("‚ö†Ô∏è Single GPU Mode.")

model = model.to(device)

# Optimization
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, steps_per_epoch=len(train_loader), epochs=EPOCHS)

# ==========================================
# 4. TRAINING LOOP
# ==========================================
print(f"Starting Training on {len(full_dataset.classes)} classes...")

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", leave=False)
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch+1}: Val Acc = {100 * correct / total:.2f}%")

# ==========================================
# 5. SAVE (Unwrap DataParallel)
# ==========================================
if isinstance(model, nn.DataParallel):
    torch.save(model.module.state_dict(), MODEL_SAVE_PATH)
else:
    torch.save(model.state_dict(), MODEL_SAVE_PATH)

idx_to_class = {v: k for k, v in full_dataset.class_to_idx.items()}
with open(MAPPING_SAVE_PATH, 'w') as f:
    json.dump(idx_to_class, f)

print(f"‚úÖ Saved Model to {MODEL_SAVE_PATH}")

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
import cv2
import numpy as np
import matplotlib.pyplot as plt
import heapq
import json
from pathlib import Path
from sklearn.cluster import DBSCAN
import random
from tqdm.auto import tqdm

# ==========================================
# 1. CONFIGURATION
# ==========================================
MODEL_PATH = "terrain_classifier_resnet_v12_1.pth"
MAPPING_PATH = "class_mapping.json"
TEST_IMG_DIR = Path("/kaggle/input/the-blind-flight-synapse-drive-ps-1/SynapseDrive_Dataset/test/images")
CELL_SIZE = 64
MAX_ROWS = 20
MAX_COLS = 20
NUM_SAMPLES = 40

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

COST_TABLES = {
    "Desert": {"Road": 1.2, "Start": 1.2, "End": 2.2, "Hazard": 3.7, "Cacti": 999.0, "Rocks": 999.0, "Obstacle": 999.0, "Unknown": 8.0},
    "Forest": {"Road": 1.5, "Start": 1.5, "End": 2.5, "Hazard": 2.8, "Tree": 999.0, "Obstacle": 999.0, "Unknown": 8.0},
    "Lab": {"Road": 1.0, "Start": 1.0, "End": 2.0, "Hazard": 3.0, "Wall": 999.0, "Plasma": 999.0, "Obstacle": 999.0, "Unknown": 8.0}
}

# ==========================================
# 2. VISION HELPERS (Grid Extraction)
# ==========================================
def preprocess_for_grid(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    no_texture = cv2.medianBlur(gray, 7)
    thresh = cv2.adaptiveThreshold(no_texture, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 19, 5)
    return thresh

def get_intersections(img):
    thresh = preprocess_for_grid(img)
    scale = 25
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (scale, 1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, scale))
    mask_h = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel)
    mask_v = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel)
    intersections = cv2.bitwise_and(mask_h, mask_v)
    intersections = cv2.dilate(intersections, np.ones((5,5)))
    num_labels, _, stats, centroids = cv2.connectedComponentsWithStats(intersections)
    points = [centroids[i] for i in range(1, num_labels) if stats[i, cv2.CC_STAT_AREA] > 10]
    if not points: return np.array([])
    clustering = DBSCAN(eps=20, min_samples=1).fit(points)
    points = np.array(points)
    clean_points = [np.mean(points[clustering.labels_ == label], axis=0) for label in set(clustering.labels_)]
    return np.array(clean_points)

def sort_points_robust(points):
    y_clustering = DBSCAN(eps=25, min_samples=3).fit(points[:, 1].reshape(-1, 1))
    rows_dict = {}
    for pt, label in zip(points, y_clustering.labels_):
        if label == -1: continue
        if label not in rows_dict: rows_dict[label] = []
        rows_dict[label].append(pt)
    sorted_keys = sorted(rows_dict.keys(), key=lambda k: np.mean([p[1] for p in rows_dict[k]]))
    return [np.array(sorted(rows_dict[k], key=lambda p: p[0])) for k in sorted_keys]

def get_global_column_grid(grid_rows):
    all_x = [pt[0] for row in grid_rows for pt in row]
    if not all_x: return np.array([])
    all_x = np.array(all_x).reshape(-1, 1)
    clustering = DBSCAN(eps=15, min_samples=1).fit(all_x)
    col_centers = [np.mean(all_x[clustering.labels_ == label]) for label in set(clustering.labels_) if label != -1]
    return np.array(sorted(col_centers))

def extract_grid_data(image_path):
    img = cv2.imread(str(image_path))
    if img is None: return None, None, "Load Error"

    grid_tensor = np.zeros((MAX_ROWS, MAX_COLS, CELL_SIZE, CELL_SIZE, 3), dtype=np.uint8)
    mask_tensor = np.zeros((MAX_ROWS, MAX_COLS), dtype=np.uint8)

    try:
        points = get_intersections(img)
        if len(points) < 40: return grid_tensor, mask_tensor, "Fallback: Low Pts"
        grid_rows = sort_points_robust(points)
        if len(grid_rows) < 4: return grid_tensor, mask_tensor, "Fallback: Few Rows"
        col_centers = get_global_column_grid(grid_rows)
        if len(col_centers) < 2: return grid_tensor, mask_tensor, "Fallback: No Cols"

        for r in range(min(len(grid_rows) - 1, MAX_ROWS)):
            row_top = grid_rows[r]
            row_btm = grid_rows[r+1]
            for pt_top in row_top:
                neighbors_tr = [p for p in row_top if p[0] > pt_top[0]]
                if not neighbors_tr: continue
                pt_tr = min(neighbors_tr, key=lambda p: p[0])
                if abs(pt_tr[0] - pt_top[0]) > 100: continue
                candidates_bl = [p for p in row_btm if abs(p[0] - pt_top[0]) < 70]
                if not candidates_bl: continue
                pt_bl = min(candidates_bl, key=lambda p: abs(p[0] - pt_top[0]))
                candidates_br = [p for p in row_btm if abs(p[0] - pt_tr[0]) < 70]
                if not candidates_br: continue
                pt_br = min(candidates_br, key=lambda p: abs(p[0] - pt_tr[0]))

                src = np.array([pt_top, pt_tr, pt_br, pt_bl], dtype="float32")
                dst = np.array([[0,0], [CELL_SIZE,0], [CELL_SIZE,CELL_SIZE], [0,CELL_SIZE]], dtype="float32")
                M = cv2.getPerspectiveTransform(src, dst)
                warped = cv2.warpPerspective(img, M, (CELL_SIZE, CELL_SIZE))

                diffs = np.abs(col_centers - pt_top[0])
                c_idx = np.argmin(diffs)
                if c_idx < MAX_COLS:
                    grid_tensor[r, c_idx] = warped
                    mask_tensor[r, c_idx] = 1
        return grid_tensor, mask_tensor, "Active: Gridded"
    except: return grid_tensor, mask_tensor, "Error"

# ==========================================
# 3. MODEL LOADING (Standard ResNet Mode)
# ==========================================
with open(MAPPING_PATH, 'r') as f:
    idx_to_class = json.load(f)
    idx_to_class = {int(k): v for k, v in idx_to_class.items()}

# Construct Standard ResNet-18 (Matches your saved file)
model = models.resnet18(pretrained=False)

# --- THE FIX: DO NOT RUN THESE LINES ---
# model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
# model.maxpool = nn.Identity()
# ---------------------------------------

model.fc = nn.Linear(model.fc.in_features, len(idx_to_class))
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()

# We still use 128x128 input because that's what we sliced,
# but Standard ResNet will just downsample it. That's fine.
transform_pipe = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ==========================================
# 4. SOLVER PROCESSOR
# ==========================================
def process_single_image(image_path):
    grid_tensor, mask_tensor, status = extract_grid_data(image_path)
    if "Active" not in status: return None, "Slicer Fail"

    rows, cols = mask_tensor.shape
    batch_tensors = []
    coords = []

    for r in range(rows):
        for c in range(cols):
            if mask_tensor[r, c] == 1:
                # BGR -> RGB
                rgb_cell = cv2.cvtColor(grid_tensor[r, c], cv2.COLOR_BGR2RGB)
                batch_tensors.append(transform_pipe(rgb_cell))
                coords.append((r, c))

    if not batch_tensors: return None, "Empty Grid"

    batch_stack = torch.stack(batch_tensors).to(device)
    with torch.no_grad():
        outputs = model(batch_stack)
        _, preds = torch.max(outputs, 1)

    terrain_map = np.full((rows, cols), "Unknown", dtype=object)
    counts = {"Desert": 0, "Forest": 0, "Lab": 0}

    for i, (r, c) in enumerate(coords):
        class_name = idx_to_class[preds[i].item()]
        terrain_map[r, c] = class_name

        biome = class_name.split("_")[0] if "_" in class_name else "Lab"
        weight = 3 if any(x in class_name for x in ["Cacti", "Tree", "Plasma", "Wall", "Rocks"]) else 1
        if "Start" in class_name or "End" in class_name: weight = 5
        counts[biome] = counts.get(biome, 0) + weight

    dominant_biome = max(counts, key=counts.get) if counts else "Lab"
    costs = COST_TABLES.get(dominant_biome, COST_TABLES["Lab"])

    start_pos, end_pos = None, None
    for r in range(rows):
        for c in range(cols):
            if "Start" in terrain_map[r, c]: start_pos = (r, c)
            if "End" in terrain_map[r, c]: end_pos = (r, c)

    path = []
    status_msg = "No Path"

    if start_pos and end_pos:
        pq = [(0, start_pos)]
        cost_so_far = {start_pos: 0}
        came_from = {}

        while pq:
            curr_cost, curr = heapq.heappop(pq)
            if curr == end_pos:
                status_msg = "Solved"
                break

            r, c = curr
            for nr, nc in [(r-1,c), (r+1,c), (r,c-1), (r,c+1)]:
                if 0 <= nr < rows and 0 <= nc < cols:
                    label = terrain_map[nr, nc]
                    if label == "Unknown": cell_type = "Unknown"
                    else: cell_type = label.split("_")[1] if "_" in label else label

                    step_cost = costs.get(cell_type, 999.0)
                    new_cost = cost_so_far[curr] + step_cost

                    if step_cost < 100:
                        if (nr, nc) not in cost_so_far or new_cost < cost_so_far[(nr, nc)]:
                            cost_so_far[(nr, nc)] = new_cost
                            heapq.heappush(pq, (new_cost + abs(nr-end_pos[0]) + abs(nc-end_pos[1]), (nr, nc)))
                            came_from[(nr, nc)] = curr

        if status_msg == "Solved":
            curr = end_pos
            while curr != start_pos:
                path.append(curr)
                curr = came_from[curr]
            path.append(start_pos)

    # Visualization
    img_vis = cv2.imread(str(image_path))
    img_vis = cv2.resize(img_vis, (400, 400))
    cell_h, cell_w = 400 // rows, 400 // cols

    for r in range(rows):
        for c in range(cols):
            if mask_tensor[r, c] == 1:
                lbl = terrain_map[r, c]
                cx, cy = int((c + 0.5) * cell_w), int((r + 0.5) * cell_h)
                color = (100, 100, 100)
                if "Start" in lbl: color = (0, 255, 255)
                if "End" in lbl: color = (0, 0, 255)
                if any(x in lbl for x in ["Obstacle", "Wall", "Tree", "Cacti", "Rocks", "Plasma"]): color = (0, 0, 0)
                if "Hazard" in lbl: color = (0, 165, 255)
                cv2.circle(img_vis, (cx, cy), 3, color, -1)

    if path:
        for i in range(len(path) - 1):
            p1 = (int((path[i][1]+0.5)*cell_w), int((path[i][0]+0.5)*cell_h))
            p2 = (int((path[i+1][1]+0.5)*cell_w), int((path[i+1][0]+0.5)*cell_h))
            cv2.line(img_vis, p1, p2, (0, 255, 0), 2)

    return img_vis, f"{dominant_biome}: {status_msg}"

# ==========================================
# 5. EXECUTION
# ==========================================
test_files = list(TEST_IMG_DIR.glob("*.png"))
if test_files:
    samples = random.sample(test_files, min(len(test_files), NUM_SAMPLES))
    print(f"Processing {len(samples)} images...")
    results = []
    for img_path in tqdm(samples):
        res_img, status = process_single_image(img_path)
        if res_img is not None: results.append((res_img, status))

    rows = (len(results) + 4) // 5
    fig, axes = plt.subplots(rows, 5, figsize=(20, 4 * rows))
    axes = axes.flatten()
    for i, (img, status) in enumerate(results):
        ax = axes[i]
        ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        title_color = 'green' if "Solved" in status else 'red'
        ax.set_title(status, color=title_color, fontsize=10, fontweight='bold')
        ax.axis('off')
    for j in range(i + 1, len(axes)): axes[j].axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("No images found.")

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.cluster import DBSCAN
import random

# ==========================================
# 1. CONFIGURATION
# ==========================================
TEST_IMG_DIR = Path("/kaggle/input/the-blind-flight-synapse-drive-ps-1/SynapseDrive_Dataset/test/images")
CELL_SIZE = 64
MAX_ROWS = 20
MAX_COLS = 20
SAMPLES_TO_CHECK = 10  # How many maps to inspect

# ==========================================
# 2. THE EXACT VISION LOGIC (Copy-Paste from your pipeline)
# ==========================================
def preprocess_for_grid(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    no_texture = cv2.medianBlur(gray, 7)
    thresh = cv2.adaptiveThreshold(no_texture, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 19, 5)
    return thresh

def get_intersections(img):
    thresh = preprocess_for_grid(img)
    scale = 25
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (scale, 1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, scale))
    mask_h = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel)
    mask_v = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel)
    intersections = cv2.bitwise_and(mask_h, mask_v)
    intersections = cv2.dilate(intersections, np.ones((5,5)))
    num_labels, _, stats, centroids = cv2.connectedComponentsWithStats(intersections)
    points = [centroids[i] for i in range(1, num_labels) if stats[i, cv2.CC_STAT_AREA] > 10]
    if not points: return np.array([])
    clustering = DBSCAN(eps=20, min_samples=1).fit(points)
    points = np.array(points)
    clean_points = [np.mean(points[clustering.labels_ == label], axis=0) for label in set(clustering.labels_)]
    return np.array(clean_points)

def sort_points_robust(points):
    y_clustering = DBSCAN(eps=25, min_samples=3).fit(points[:, 1].reshape(-1, 1))
    rows_dict = {}
    for pt, label in zip(points, y_clustering.labels_):
        if label == -1: continue
        if label not in rows_dict: rows_dict[label] = []
        rows_dict[label].append(pt)
    sorted_keys = sorted(rows_dict.keys(), key=lambda k: np.mean([p[1] for p in rows_dict[k]]))
    return [np.array(sorted(rows_dict[k], key=lambda p: p[0])) for k in sorted_keys]

def get_global_column_grid(grid_rows):
    all_x = [pt[0] for row in grid_rows for pt in row]
    if not all_x: return np.array([])
    all_x = np.array(all_x).reshape(-1, 1)
    clustering = DBSCAN(eps=15, min_samples=1).fit(all_x)
    col_centers = [np.mean(all_x[clustering.labels_ == label]) for label in set(clustering.labels_) if label != -1]
    return np.array(sorted(col_centers))

def extract_grid_data(img):
    # Returns the visual grid tensor and mask
    grid_tensor = np.zeros((MAX_ROWS, MAX_COLS, CELL_SIZE, CELL_SIZE, 3), dtype=np.uint8)
    mask_tensor = np.zeros((MAX_ROWS, MAX_COLS), dtype=np.uint8)
    debug_points = [] # To draw grid lines later

    try:
        points = get_intersections(img)
        if len(points) < 40: return None, None, [], "Fallback: Low Pts"
        grid_rows = sort_points_robust(points)
        if len(grid_rows) < 4: return None, None, [], "Fallback: Few Rows"
        col_centers = get_global_column_grid(grid_rows)
        if len(col_centers) < 2: return None, None, [], "Fallback: No Cols"

        for r in range(min(len(grid_rows) - 1, MAX_ROWS)):
            row_top = grid_rows[r]
            row_btm = grid_rows[r+1]
            for pt_top in row_top:
                # Find neighbors to build the quad
                neighbors_tr = [p for p in row_top if p[0] > pt_top[0]]
                if not neighbors_tr: continue
                pt_tr = min(neighbors_tr, key=lambda p: p[0])
                if abs(pt_tr[0] - pt_top[0]) > 100: continue

                candidates_bl = [p for p in row_btm if abs(p[0] - pt_top[0]) < 70]
                if not candidates_bl: continue
                pt_bl = min(candidates_bl, key=lambda p: abs(p[0] - pt_top[0]))
                candidates_br = [p for p in row_btm if abs(p[0] - pt_tr[0]) < 70]
                if not candidates_br: continue
                pt_br = min(candidates_br, key=lambda p: abs(p[0] - pt_tr[0]))

                # Perspective Warp
                src = np.array([pt_top, pt_tr, pt_br, pt_bl], dtype="float32")
                dst = np.array([[0,0], [CELL_SIZE,0], [CELL_SIZE,CELL_SIZE], [0,CELL_SIZE]], dtype="float32")
                M = cv2.getPerspectiveTransform(src, dst)
                warped = cv2.warpPerspective(img, M, (CELL_SIZE, CELL_SIZE))

                # Grid Mapping
                diffs = np.abs(col_centers - pt_top[0])
                c_idx = np.argmin(diffs)
                if c_idx < MAX_COLS:
                    grid_tensor[r, c_idx] = warped
                    mask_tensor[r, c_idx] = 1
                    debug_points.append(src) # Store quad for visualization

        return grid_tensor, mask_tensor, debug_points, "Active: Gridded"
    except Exception as e: return None, None, [], f"Error: {e}"

# ==========================================
# 3. VISUALIZATION LOOP
# ==========================================
test_files = list(TEST_IMG_DIR.glob("*.png"))
samples = random.sample(test_files, SAMPLES_TO_CHECK)

for img_path in samples:
    print(f"\nüîç Inspecting: {img_path.name}")
    original = cv2.imread(str(img_path))
    original_vis = original.copy()

    # 1. Run Slicer
    grid, mask, debug_quads, status = extract_grid_data(original)

    if "Active" not in status:
        print(f"‚ùå Failed: {status}")
        plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
        plt.title(f"FAILED: {status}")
        plt.show()
        continue

    # 2. Draw Grid on Original
    for quad in debug_quads:
        pts = quad.astype(int)
        # Draw Top Line
        cv2.line(original_vis, tuple(pts[0]), tuple(pts[1]), (0, 255, 255), 2)
        # Draw Right Line
        cv2.line(original_vis, tuple(pts[1]), tuple(pts[2]), (0, 255, 255), 2)
        # Draw Bottom Line
        cv2.line(original_vis, tuple(pts[2]), tuple(pts[3]), (0, 255, 255), 2)
        # Draw Left Line
        cv2.line(original_vis, tuple(pts[3]), tuple(pts[0]), (0, 255, 255), 2)

    # 3. Plotting
    rows, cols = mask.shape
    active_cells = []

    # Collect all sliced images
    for r in range(rows):
        for c in range(cols):
            if mask[r, c] == 1:
                active_cells.append(grid[r, c])

    # Figure 1: The "Blueprint" (Where we cut)
    plt.figure(figsize=(10, 6))
    plt.imshow(cv2.cvtColor(original_vis, cv2.COLOR_BGR2RGB))
    plt.title(f"Slicer Grid Overlay | Found {len(active_cells)} cells")
    plt.axis('off')
    plt.show()

    # Figure 2: The "Biopsy" (What the model sees)
    # Display first 20 extracted cells
    num_show = min(len(active_cells), 20)
    if num_show > 0:
        fig, axes = plt.subplots(2, 10, figsize=(15, 4))
        axes = axes.flatten()

        for i in range(len(axes)):
            if i < num_show:
                cell_rgb = cv2.cvtColor(active_cells[i], cv2.COLOR_BGR2RGB)
                axes[i].imshow(cell_rgb)
                axes[i].set_title(f"Cell {i}")
            axes[i].axis('off')
        plt.suptitle("Extracted Input Tensors (64x64)", fontsize=16)
        plt.tight_layout()
        plt.show()
    else:
        print("‚ö†Ô∏è No cells extracted despite success status.")

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
import cv2
import numpy as np
import heapq
import json
import pandas as pd
from pathlib import Path
from sklearn.cluster import DBSCAN
from tqdm.auto import tqdm
import re

# ==========================================
# 1. CONFIGURATION
# ==========================================
MODEL_PATH = "/kaggle/input/blindflightv13/pytorch/default/1/terrain_classifier_resnet_v12_1.pth"
MAPPING_PATH = "/kaggle/input/blindflightclassmaping/class_mapping (1).json"
TEST_IMG_DIR = Path("/kaggle/input/the-blind-flight-synapse-drive-ps-1/SynapseDrive_Dataset/test/images")
OUTPUT_CSV = "submission.csv"

CELL_SIZE = 64
MAX_ROWS = 20
MAX_COLS = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

COST_TABLES = {
    "Desert": {"Road": 1.2, "Start": 1.2, "End": 2.2, "Hazard": 3.7, "Cacti": 999.0, "Rocks": 999.0, "Obstacle": 999.0, "Unknown": 8.0},
    "Forest": {"Road": 1.5, "Start": 1.5, "End": 2.5, "Hazard": 2.8, "Tree": 999.0, "Obstacle": 999.0, "Unknown": 8.0},
    "Lab": {"Road": 1.0, "Start": 1.0, "End": 2.0, "Hazard": 3.0, "Wall": 999.0, "Plasma": 999.0, "Obstacle": 999.0, "Unknown": 8.0}
}

# ==========================================
# 2. VISION & GRID HELPERS
# ==========================================
def preprocess_for_grid(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    no_texture = cv2.medianBlur(gray, 7)
    thresh = cv2.adaptiveThreshold(no_texture, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 19, 5)
    return thresh

def get_intersections(img):
    thresh = preprocess_for_grid(img)
    scale = 25
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (scale, 1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, scale))
    mask_h = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel)
    mask_v = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel)
    intersections = cv2.bitwise_and(mask_h, mask_v)
    intersections = cv2.dilate(intersections, np.ones((5,5)))
    num_labels, _, stats, centroids = cv2.connectedComponentsWithStats(intersections)
    points = [centroids[i] for i in range(1, num_labels) if stats[i, cv2.CC_STAT_AREA] > 10]
    if not points: return np.array([])
    clustering = DBSCAN(eps=20, min_samples=1).fit(points)
    points = np.array(points)
    clean_points = [np.mean(points[clustering.labels_ == label], axis=0) for label in set(clustering.labels_)]
    return np.array(clean_points)

def sort_points_robust(points):
    y_clustering = DBSCAN(eps=25, min_samples=3).fit(points[:, 1].reshape(-1, 1))
    rows_dict = {}
    for pt, label in zip(points, y_clustering.labels_):
        if label == -1: continue
        if label not in rows_dict: rows_dict[label] = []
        rows_dict[label].append(pt)
    sorted_keys = sorted(rows_dict.keys(), key=lambda k: np.mean([p[1] for p in rows_dict[k]]))
    return [np.array(sorted(rows_dict[k], key=lambda p: p[0])) for k in sorted_keys]

def get_global_column_grid(grid_rows):
    all_x = [pt[0] for row in grid_rows for pt in row]
    if not all_x: return np.array([])
    all_x = np.array(all_x).reshape(-1, 1)
    clustering = DBSCAN(eps=15, min_samples=1).fit(all_x)
    col_centers = [np.mean(all_x[clustering.labels_ == label]) for label in set(clustering.labels_) if label != -1]
    return np.array(sorted(col_centers))

def extract_grid_data(image_path):
    img = cv2.imread(str(image_path))
    if img is None: return None, None, "Load Error"

    grid_tensor = np.zeros((MAX_ROWS, MAX_COLS, CELL_SIZE, CELL_SIZE, 3), dtype=np.uint8)
    mask_tensor = np.zeros((MAX_ROWS, MAX_COLS), dtype=np.uint8)

    try:
        points = get_intersections(img)
        if len(points) < 40: return grid_tensor, mask_tensor, "Fallback: Low Pts"
        grid_rows = sort_points_robust(points)
        if len(grid_rows) < 4: return grid_tensor, mask_tensor, "Fallback: Few Rows"
        col_centers = get_global_column_grid(grid_rows)
        if len(col_centers) < 2: return grid_tensor, mask_tensor, "Fallback: No Cols"

        for r in range(min(len(grid_rows) - 1, MAX_ROWS)):
            row_top = grid_rows[r]
            row_btm = grid_rows[r+1]
            for pt_top in row_top:
                neighbors_tr = [p for p in row_top if p[0] > pt_top[0]]
                if not neighbors_tr: continue
                pt_tr = min(neighbors_tr, key=lambda p: p[0])
                if abs(pt_tr[0] - pt_top[0]) > 100: continue
                candidates_bl = [p for p in row_btm if abs(p[0] - pt_top[0]) < 70]
                if not candidates_bl: continue
                pt_bl = min(candidates_bl, key=lambda p: abs(p[0] - pt_top[0]))
                candidates_br = [p for p in row_btm if abs(p[0] - pt_tr[0]) < 70]
                if not candidates_br: continue
                pt_br = min(candidates_br, key=lambda p: abs(p[0] - pt_tr[0]))

                src = np.array([pt_top, pt_tr, pt_br, pt_bl], dtype="float32")
                dst = np.array([[0,0], [CELL_SIZE,0], [CELL_SIZE,CELL_SIZE], [0,CELL_SIZE]], dtype="float32")
                M = cv2.getPerspectiveTransform(src, dst)
                warped = cv2.warpPerspective(img, M, (CELL_SIZE, CELL_SIZE))

                diffs = np.abs(col_centers - pt_top[0])
                c_idx = np.argmin(diffs)
                if c_idx < MAX_COLS:
                    grid_tensor[r, c_idx] = warped
                    mask_tensor[r, c_idx] = 1
        return grid_tensor, mask_tensor, "Active: Gridded"
    except: return grid_tensor, mask_tensor, "Error"

# ==========================================
# 3. MODEL LOADING (FIXED FOR STANDARD RESNET)
# ==========================================
with open(MAPPING_PATH, 'r') as f:
    idx_to_class = json.load(f)
    idx_to_class = {int(k): v for k, v in idx_to_class.items()}

print("üöÄ Loading V13 Model (Standard Architecture)...")

# 1. Base ResNet
model = models.resnet18(pretrained=False)

# --- FIX: DISABLED HACK TO MATCH SAVED FILE ---
# The saved checkpoint has a 7x7 conv1, so we must use standard definition.
# model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
# model.maxpool = nn.Identity()
# ----------------------------------------------

model.fc = nn.Linear(model.fc.in_features, len(idx_to_class))

try:
    state_dict = torch.load(MODEL_PATH, map_location=device)
    model.load_state_dict(state_dict)
except RuntimeError as e:
    print("\n‚ùå LOAD ERROR! Model mismatch.")
    print(f"Error details: {e}\n")
    raise e

if torch.cuda.device_count() > 1:
    print(f"üî• Twin-Turbo Active: {torch.cuda.device_count()} GPUs for Inference")
    model = nn.DataParallel(model)

model = model.to(device)
model.eval()

# We still feed 128x128 images. ResNet will just downsample them normally.
transform_pipe = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ==========================================
# 4. PATH FORMATTER (LOWERCASE LDRU)
# ==========================================
def path_coords_to_string(path_list):
    if not path_list or len(path_list) < 2:
        return None

    dirs = []
    for i in range(len(path_list) - 1):
        r1, c1 = path_list[i]
        r2, c2 = path_list[i+1]

        if r2 > r1: dirs.append("d")
        elif r2 < r1: dirs.append("u")
        elif c2 > c1: dirs.append("r")
        elif c2 < c1: dirs.append("l")

    return "".join(dirs)

def clean_image_id(filename):
    """ '0001.png' -> 1 """
    s = str(filename).lower()
    s = s.replace(".png", "").replace(".jpg", "")
    try:
        return int(s)
    except:
        return s

# ==========================================
# 5. SOLVER
# ==========================================
def solve_image(image_path):
    # 1. Slice
    grid_tensor, mask_tensor, status = extract_grid_data(image_path)
    if "Active" not in status: return None

    rows, cols = mask_tensor.shape
    batch_tensors = []
    coords = []

    # 2. Batch
    for r in range(rows):
        for c in range(cols):
            if mask_tensor[r, c] == 1:
                rgb_cell = cv2.cvtColor(grid_tensor[r, c], cv2.COLOR_BGR2RGB)
                batch_tensors.append(transform_pipe(rgb_cell))
                coords.append((r, c))

    if not batch_tensors: return None

    # 3. Predict
    batch_stack = torch.stack(batch_tensors).to(device)
    with torch.no_grad():
        outputs = model(batch_stack)
        _, preds = torch.max(outputs, 1)

    # 4. Map
    terrain_map = np.full((rows, cols), "Unknown", dtype=object)
    counts = {"Desert": 0, "Forest": 0, "Lab": 0}

    for i, (r, c) in enumerate(coords):
        class_name = idx_to_class[preds[i].item()]
        terrain_map[r, c] = class_name

        biome = class_name.split("_")[0] if "_" in class_name else "Lab"
        weight = 3 if any(x in class_name for x in ["Cacti", "Tree", "Plasma", "Wall", "Rocks"]) else 1
        if "Start" in class_name or "End" in class_name: weight = 5
        counts[biome] = counts.get(biome, 0) + weight

    dominant_biome = max(counts, key=counts.get) if counts else "Lab"
    costs = COST_TABLES.get(dominant_biome, COST_TABLES["Lab"])

    # 5. A*
    start_pos, end_pos = None, None
    for r in range(rows):
        for c in range(cols):
            if "Start" in terrain_map[r, c]: start_pos = (r, c)
            if "End" in terrain_map[r, c]: end_pos = (r, c)

    if not start_pos or not end_pos: return None

    pq = [(0, start_pos)]
    cost_so_far = {start_pos: 0}
    came_from = {}

    while pq:
        curr_cost, curr = heapq.heappop(pq)
        if curr == end_pos:
            path = []
            while curr != start_pos:
                path.append(curr)
                curr = came_from[curr]
            path.append(start_pos)
            path.reverse()
            return path_coords_to_string(path)

        r, c = curr
        for nr, nc in [(r-1,c), (r+1,c), (r,c-1), (r,c+1)]:
            if 0 <= nr < rows and 0 <= nc < cols:
                label = terrain_map[nr, nc]
                cell_type = label.split("_")[1] if "_" in label else label
                step_cost = costs.get(cell_type, 999.0)

                new_cost = cost_so_far[curr] + step_cost
                if step_cost < 100:
                    if (nr, nc) not in cost_so_far or new_cost < cost_so_far[(nr, nc)]:
                        cost_so_far[(nr, nc)] = new_cost
                        priority = new_cost + abs(nr-end_pos[0]) + abs(nc-end_pos[1])
                        heapq.heappush(pq, (priority, (nr, nc)))
                        came_from[(nr, nc)] = curr

    return None

# ==========================================
# 6. RUN
# ==========================================
test_files = sorted(list(TEST_IMG_DIR.glob("*.png")))
data = []

print(f"Generating Submission for {len(test_files)} images...")

for img_path in tqdm(test_files):
    path_str = solve_image(img_path)
    data.append({
        "image_id": clean_image_id(img_path.name),
        "path": path_str
    })

df = pd.DataFrame(data)
df = df.sort_values(by="image_id")
df.to_csv(OUTPUT_CSV, index=False)
print(f"‚úÖ Submission saved to {OUTPUT_CSV}")
print(df.head())

In [None]:
import pandas as pd
import re
import os

# ==========================================
# CONFIGURATION
# ==========================================
MAIN_FILE   = "submission.csv"      # The V13 file you just generated
BACKUP_FILE = "/kaggle/input/subfill4/submission_filled(4).csv"          # Your backup file (any format)
OUTPUT_FILE = "final_submission.csv"

# ==========================================
# 1. HELPER FUNCTIONS
# ==========================================
def clean_id(val):
    """ Converts '0001.png', '1.png', '0001' -> 1 """
    s = str(val).lower().strip()
    s = s.replace(".png", "").replace(".jpg", "")
    try:
        return int(s)
    except:
        return None # Should not happen if data is clean

def clean_path(val):
    """ Converts Coords OR uppercase dirs to 'ldru' """
    if pd.isna(val) or val == "" or str(val).lower() == "nan":
        return None

    val = str(val).strip()

    # CASE 1: Coordinate String like "(0,0)->(0,1)"
    if "->" in val and "(" in val:
        matches = re.findall(r"\((\d+),(\d+)\)", val)
        path = [(int(r), int(c)) for r, c in matches]
        dirs = []
        for i in range(len(path) - 1):
            r1, c1 = path[i]
            r2, c2 = path[i+1]
            if r2 > r1: dirs.append("d")
            elif r2 < r1: dirs.append("u")
            elif c2 > c1: dirs.append("r")
            elif c2 < c1: dirs.append("l")
        return "".join(dirs)

    # CASE 2: Direction String like "RRDD" or "rrdd"
    return val.lower()

# ==========================================
# 2. EXECUTION
# ==========================================
print("üîÑ Loading files...")

# --- LOAD MAIN ---
if not os.path.exists(MAIN_FILE):
    print(f"‚ùå Error: {MAIN_FILE} not found. Generate it first!")
    exit()

df_main = pd.read_csv(MAIN_FILE)
# Standardize Columns
if "Image" in df_main.columns: df_main.rename(columns={"Image": "image_id"}, inplace=True)
if "Path" in df_main.columns: df_main.rename(columns={"Path": "path"}, inplace=True)

# --- LOAD BACKUP ---
if os.path.exists(BACKUP_FILE):
    df_backup = pd.read_csv(BACKUP_FILE)
    # Heuristic to find columns
    cols = df_backup.columns
    # Rename 1st col to image_id, 2nd to path (assuming standard structure)
    df_backup.rename(columns={cols[0]: "image_id", cols[1]: "path"}, inplace=True)
    print(f"‚úÖ Backup loaded ({len(df_backup)} rows)")
else:
    print("‚ö†Ô∏è  Backup file not found. Creating empty backup.")
    df_backup = pd.DataFrame(columns=["image_id", "path"])

# ==========================================
# 3. STANDARDIZATION
# ==========================================
print("üîß Standardizing IDs and Paths...")

# Clean IDs to Integer
df_main['clean_id'] = df_main['image_id'].apply(clean_id)
df_backup['clean_id'] = df_backup['image_id'].apply(clean_id)

# Clean Paths to 'ldru'
df_main['clean_path'] = df_main['path'].apply(clean_path)
df_backup['clean_path'] = df_backup['path'].apply(clean_path)

# ==========================================
# 4. MERGE & FILL
# ==========================================
print("ü©π Merging and patching holes...")

# Merge on the clean Integer ID
df_final = df_main.merge(df_backup[['clean_id', 'clean_path']],
                         on='clean_id',
                         how='left',
                         suffixes=('', '_backup'))

# Logic: Use Main Path. If None, use Backup Path.
def fill_strategy(row):
    main_p = row['clean_path']
    back_p = row['clean_path_backup']

    if main_p is not None and len(main_p) > 0:
        return main_p
    return back_p

df_final['final_path'] = df_final.apply(fill_strategy, axis=1)

# ==========================================
# 5. OUTPUT FORMATTING
# ==========================================
# Select only required columns
output_df = df_final[['clean_id', 'final_path']].copy()
output_df.columns = ['image_id', 'path']

# Sort by ID
output_df = output_df.sort_values(by="image_id")

# Check for remaining nulls
missing = output_df['path'].isna().sum()
if missing > 0:
    print(f"‚ö†Ô∏è  Warning: {missing} paths are still empty (missing in both main and backup).")
else:
    print("‚úÖ All paths filled successfully.")

# Save
output_df.to_csv(OUTPUT_FILE, index=False)
print(f"üöÄ Saved Final Submission to: {OUTPUT_FILE}")
print(output_df.head())