# Image Resize

In [None]:
import os
import cv2

input_folder = "raw_dataset"
output_folder = "dataset_resized"
target_size = (640, 640)  # Width, Height

os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            resized = cv2.resize(img, target_size)
            cv2.imwrite(os.path.join(output_folder, filename), resized)
            print(f"Saved: {filename}")

# Image Segmentation

In [None]:
import os
import cv2
import json
import torch
import numpy as np
from tqdm import tqdm
from datetime import datetime
from segment_anything import sam_model_registry, SamPredictor
from pycocotools import mask as mask_utils
import urllib.request

In [None]:
print(torch.cuda.is_available())

In [None]:
# # Define checkpoint path and download url
# checkpoint_path = os.path.join("checkpoints", "sam_vit_h_4b8939.pth")
# checkpoint_url = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"

# # Make directory if it doesn't exist
# os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

# # Download if the file is missing
# if not os.path.exists(checkpoint_path):
#     print(f"Downloading SAM checkpoint from {checkpoint_url}...")
#     urllib.request.urlretrieve(checkpoint_url, checkpoint_path)
#     print("Download complete.")
# else:
#     print("Checkpoint already exists at:", checkpoint_path)

In [None]:
image_folder = r"dataset_resized"
model_type = "vit_h"
output_root = "segmentation_outputs"
os.makedirs(output_root, exist_ok=True)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint_path = r"C:\Users\poten\Downloads\sam_vit_h_4b8939.pth"
sam = sam_model_registry[model_type](checkpoint=checkpoint_path)

sam.to(device)

predictor = SamPredictor(sam)

In [None]:
# Track processed images
processed_images = {
    os.path.splitext(f)[0].replace("_segmented", "")
    for f in os.listdir(output_root)
    if f.endswith((".png", ".jpg", ".jpeg"))
}
# print(sorted(processed_images))

In [None]:
def binary_mask_to_rle(mask):
    rle = mask_utils.encode(np.asfortranarray(mask.astype(np.uint8)))
    rle["counts"] = rle["counts"].decode("utf-8")  # bytes to str for JSON
    return rle


annotation_id = 1
image_id = 1

In [None]:
# i = 0
for fname in tqdm(os.listdir(image_folder)):
    if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
        continue

    base_name = os.path.splitext(fname)[0]
    if base_name in processed_images:
        continue

    print(fname)

    # i += 1
    # if i < 100:
    #     continue

    while True:
        image_path = os.path.join(image_folder, fname)
        image_bgr = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        h, w = image_rgb.shape[:2]

        predictor.set_image(image_rgb)

        click_points = []
        click_labels = []
        removal_points = []
        removal_labels = []

        def click_event(event, x, y, flags, param):
            if event == cv2.EVENT_LBUTTONDOWN:
                click_points.append([x, y])
                click_labels.append(1)
                cv2.circle(image_bgr, (x, y), 5, (0, 255, 0), -1)
                cv2.imshow("Click points, press 's' to segment or 'k' to skip", image_bgr)
            if event == cv2.EVENT_RBUTTONDOWN:
                removal_points.append([x, y])
                removal_labels.append(0)
                cv2.circle(image_bgr, (x, y), 5, (0, 0, 255), -1)
                cv2.imshow(
                    "Click points, press 's' to segment or 'k' to skip", image_bgr
                )

        print(f"\nProcessing image: {fname}")
        cv2.imshow("Click points, press 's' to segment or 'k' to skip", image_bgr)
        cv2.setMouseCallback("Click points, press 's' to segment or 'k' to skip", click_event)

        key = None
        # Wait for 's' to segment or 'k' to skip or 'ESC' to exit
        while True:
            key = cv2.waitKey(1)
            if key == ord("s") and click_points:
                break
            if key == ord("k"):
                print("⏭️ Skipping this image...")
                break
            elif key == 27:
                cv2.destroyAllWindows()
                exit()

        if key == ord("k"):
            break  # Move to next image

        input_points = np.array(click_points + removal_points)
        input_labels = np.array(click_labels + removal_labels)
        masks, scores, logits = predictor.predict(
            point_coords=input_points,
            point_labels=input_labels,
            multimask_output=False,
        )
        mask = masks[0]

        result_overlay = image_bgr.copy()
        result_overlay[mask] = [0, 255, 0]

        # Show result and ask user to retry or accept
        cv2.imshow("Result - press 'r' to retry or any key to accept", result_overlay)
        key = cv2.waitKey(0)
        cv2.destroyAllWindows()

        if key == ord("r"):
            print("🔁 Retrying segmentation for this image...")
            continue  # Re-do the same image

        # Save segmentation result
        overlay_path = os.path.join(output_root, f"{base_name}_segmented.png")
        cv2.imwrite(overlay_path, result_overlay)

        coco_data = {
            "info": {
                "description": "Manual SAM Segmentation",
                "date_created": datetime.now().isoformat(),
            },
            "images": [{"id": image_id, "file_name": fname, "width": w, "height": h}],
            "annotations": [
                {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": 1,
                    "segmentation": binary_mask_to_rle(mask),
                    "area": int(mask.sum()),
                    "bbox": list(cv2.boundingRect(mask.astype(np.uint8))),
                    "iscrowd": 0,
                }
            ],
            "categories": [{"id": 1, "name": "object"}],
        }

        json_path = os.path.join(output_root, f"{base_name}.json")
        with open(json_path, "w") as f:
            json.dump(coco_data, f)

        # ✅ Save processed image info
        processed_images.add(fname)

        annotation_id += 1
        image_id += 1
        break  # Move to next image


print("\n✅ All segmentations complete. JSON and segmented images saved.")

In [1]:
import os
import shutil
import glob

# Define paths
original_folder = "Dataset_resized"  # Replace with path to original images
segmented_folder = "segmentation_outputs"  # Replace with path to segmented images

# Get all segmented image filenames
segmented_files = glob.glob(os.path.join(segmented_folder, "*_segmented.*"))

# Extract base names (without _segmented and extension)
segmented_basenames = {
    os.path.basename(f).rsplit("_segmented", 1)[0] for f in segmented_files
}

# Copy matching originals
for base_name in segmented_basenames:
    # Find the original image (assuming any extension)
    matching_originals = glob.glob(os.path.join(original_folder, base_name + ".*"))

    if matching_originals:
        # Use the first match
        original_file = matching_originals[0]
        dest_file = os.path.join(segmented_folder, os.path.basename(original_file))

        shutil.copy2(original_file, dest_file)
        print(f"Copied: {original_file} -> {dest_file}")
    else:
        print(f"No original found for: {base_name}")

# Remove all _segmented images
for f in segmented_files:
    os.remove(f)
    print(f"Removed: {f}")

Copied: Dataset_resized\IMG-20250519-WA0093.jpg -> segmentation_outputs\IMG-20250519-WA0093.jpg
Copied: Dataset_resized\IMG-20250519-WA0009.jpg -> segmentation_outputs\IMG-20250519-WA0009.jpg
Copied: Dataset_resized\20250518_124325.jpg -> segmentation_outputs\20250518_124325.jpg
Copied: Dataset_resized\20250518_123119.jpg -> segmentation_outputs\20250518_123119.jpg
Copied: Dataset_resized\20250517_194910.jpg -> segmentation_outputs\20250517_194910.jpg
Copied: Dataset_resized\94_jpg.rf.f7bc4443523972a5361c655171fa55e0.jpg -> segmentation_outputs\94_jpg.rf.f7bc4443523972a5361c655171fa55e0.jpg
Copied: Dataset_resized\IMG-20250519-WA0133.jpg -> segmentation_outputs\IMG-20250519-WA0133.jpg
Copied: Dataset_resized\WhatsApp Image 2025-05-11 at 17.55.02_a09c1ac6.jpg -> segmentation_outputs\WhatsApp Image 2025-05-11 at 17.55.02_a09c1ac6.jpg
Copied: Dataset_resized\20250518_122936.jpg -> segmentation_outputs\20250518_122936.jpg
Copied: Dataset_resized\20250518_123114.jpg -> segmentation_outputs\