In [1]:
import os
HOME = os.getcwd()
print("HOME:", HOME)

HOME: /


In [2]:
!git clone https://github.com/facebookresearch/segment-anything-2.git
%cd {HOME}/segment-anything-2
!pip install -e . -q

Cloning into 'segment-anything-2'...
remote: Enumerating objects: 974, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 974 (delta 17), reused 20 (delta 7), pack-reused 936 (from 1)[K
Receiving objects: 100% (974/974), 128.94 MiB | 17.53 MiB/s, done.
Resolving deltas: 100% (334/334), done.
/segment-anything-2
[0m

In [3]:
!pip install -q supervision jupyter_bbox_widget

[0m

In [4]:
!mkdir -p {HOME}/checkpoints
!wget -q https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_tiny.pt -P {HOME}/checkpoints
!wget -q https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_small.pt -P {HOME}/checkpoints
!wget -q https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_base_plus.pt -P {HOME}/checkpoints
!wget -q https://dl.fbaipublicfiles.com/segment_anything_2/072824/sam2_hiera_large.pt -P {HOME}/checkpoints

In [5]:
import cv2
import torch
import base64

import numpy as np
import supervision as sv

from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator

In [6]:
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()

if torch.cuda.get_device_properties(0).major >= 8:
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

In [7]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
CHECKPOINT = f"{HOME}/checkpoints/sam2_hiera_large.pt"
CONFIG = "sam2_hiera_l.yaml"

sam2_model = build_sam2(CONFIG, CHECKPOINT, device=DEVICE, apply_postprocessing=False)

In [8]:
mask_generator = SAM2AutomaticMaskGenerator(sam2_model)

In [17]:
%cd home

/home


In [18]:
IMAGE_DIR = f"./datasets/images/train"  
OUTPUT_DIR = f"./datasets/seg_images/train"  
MASK_OUTPUT_DIR = f"./datasets/new_masks/train"  

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MASK_OUTPUT_DIR, exist_ok=True)

def process_images_in_directory(image_dir, mask_output_dir, output_dir):
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)

        image_bgr = cv2.imread(image_path)
        if image_bgr is None:
            print(f"Unable to read image {image_path}, skipping.")
            continue
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        sam2_result = mask_generator.generate(image_rgb)

        save_and_remove_background(image_bgr, sam2_result, image_name, mask_output_dir, output_dir)

def save_and_remove_background(image, masks, image_name, mask_output_dir, output_dir):
    for i, mask in enumerate(masks):
        mask_image = mask['segmentation'].astype(np.uint8) * 255  

        mask_filename = os.path.join(mask_output_dir, f"{os.path.splitext(image_name)[0]}_mask_{i}.png")
        cv2.imwrite(mask_filename, mask_image) 

        result_with_mask = cv2.bitwise_and(image, image, mask=mask_image)

        background = np.full(image.shape, 255, dtype=np.uint8)  
        mask_bg = cv2.bitwise_not(mask_image)  
        bg_removed_with_mask = cv2.bitwise_and(background, background, mask=mask_bg)

        receipt_only_with_mask = cv2.add(result_with_mask, bg_removed_with_mask)

        result_filename_with_mask = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_{i}.png")
        cv2.imwrite(result_filename_with_mask, receipt_only_with_mask)

        mask_inv = cv2.bitwise_not(mask_image)

        result_with_mask_inv = cv2.bitwise_and(image, image, mask=mask_inv)

        bg_removed_with_mask_inv = cv2.bitwise_and(background, background, mask=mask_image)

        receipt_only_with_mask_inv = cv2.add(result_with_mask_inv, bg_removed_with_mask_inv)

        result_filename_with_mask_inv = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_inv_{i}.png")
        cv2.imwrite(result_filename_with_mask_inv, receipt_only_with_mask_inv)

process_images_in_directory(IMAGE_DIR, MASK_OUTPUT_DIR, OUTPUT_DIR)

print(f"Receipt-only images saved in {OUTPUT_DIR}")

Receipt-only images saved in ./datasets/seg_images/train


In [None]:
IMAGE_DIR = f"./datasets/images/train"  
OUTPUT_DIR = f"./datasets/seg_images/train" 
MASK_OUTPUT_DIR = f"./datasets/new_masks/train" 

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MASK_OUTPUT_DIR, exist_ok=True)

def process_images_in_directory(image_dir, mask_output_dir, output_dir):
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)

        image_bgr = cv2.imread(image_path)
        if image_bgr is None:
            print(f"Unable to read image {image_path}, skipping.")
            continue
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        sam2_result = mask_generator.generate(image_rgb)

        save_and_remove_background(image_bgr, sam2_result, image_name, mask_output_dir, output_dir)

def save_and_remove_background(image, masks, image_name, mask_output_dir, output_dir):
    for i, mask in enumerate(masks):
        mask_image = mask['segmentation'].astype(np.uint8) * 255 

        mask_filename = os.path.join(mask_output_dir, f"{os.path.splitext(image_name)[0]}_mask_{i}.png")
        cv2.imwrite(mask_filename, mask_image) 

        result_with_mask = cv2.bitwise_and(image, image, mask=mask_image)

        background = np.full(image.shape, 255, dtype=np.uint8)  
        mask_bg = cv2.bitwise_not(mask_image)  
        bg_removed_with_mask = cv2.bitwise_and(background, background, mask=mask_bg)

        receipt_only_with_mask = cv2.add(result_with_mask, bg_removed_with_mask)

        result_filename_with_mask = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_{i}.png")
        cv2.imwrite(result_filename_with_mask, receipt_only_with_mask)

        mask_inv = cv2.bitwise_not(mask_image)

        result_with_mask_inv = cv2.bitwise_and(image, image, mask=mask_inv)

        bg_removed_with_mask_inv = cv2.bitwise_and(background, background, mask=mask_image)

        receipt_only_with_mask_inv = cv2.add(result_with_mask_inv, bg_removed_with_mask_inv)

        result_filename_with_mask_inv = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_inv_{i}.png")
        cv2.imwrite(result_filename_with_mask_inv, receipt_only_with_mask_inv)

process_images_in_directory(IMAGE_DIR, MASK_OUTPUT_DIR, OUTPUT_DIR)

print(f"Receipt-only images saved in {OUTPUT_DIR}")

In [19]:
IMAGE_DIR = f"./datasets/images/val" 
OUTPUT_DIR = f"./datasets/seg_images/val"  
MASK_OUTPUT_DIR = f"./datasets/new_masks/val"  

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MASK_OUTPUT_DIR, exist_ok=True)

def process_images_in_directory(image_dir, mask_output_dir, output_dir):
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)

        image_bgr = cv2.imread(image_path)
        if image_bgr is None:
            print(f"Unable to read image {image_path}, skipping.")
            continue
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        sam2_result = mask_generator.generate(image_rgb)

        save_and_remove_background(image_bgr, sam2_result, image_name, mask_output_dir, output_dir)

def save_and_remove_background(image, masks, image_name, mask_output_dir, output_dir):
    for i, mask in enumerate(masks):
        mask_image = mask['segmentation'].astype(np.uint8) * 255 
        
        mask_filename = os.path.join(mask_output_dir, f"{os.path.splitext(image_name)[0]}_mask_{i}.png")
        cv2.imwrite(mask_filename, mask_image)  

        result_with_mask = cv2.bitwise_and(image, image, mask=mask_image)

        background = np.full(image.shape, 255, dtype=np.uint8)  
        mask_bg = cv2.bitwise_not(mask_image)  
        bg_removed_with_mask = cv2.bitwise_and(background, background, mask=mask_bg)

        receipt_only_with_mask = cv2.add(result_with_mask, bg_removed_with_mask)

        result_filename_with_mask = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_{i}.png")
        cv2.imwrite(result_filename_with_mask, receipt_only_with_mask)

        mask_inv = cv2.bitwise_not(mask_image)

        result_with_mask_inv = cv2.bitwise_and(image, image, mask=mask_inv)

        bg_removed_with_mask_inv = cv2.bitwise_and(background, background, mask=mask_image)

        receipt_only_with_mask_inv = cv2.add(result_with_mask_inv, bg_removed_with_mask_inv)

        result_filename_with_mask_inv = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_inv_{i}.png")
        cv2.imwrite(result_filename_with_mask_inv, receipt_only_with_mask_inv)

process_images_in_directory(IMAGE_DIR, MASK_OUTPUT_DIR, OUTPUT_DIR)

print(f"Receipt-only images saved in {OUTPUT_DIR}")

Receipt-only images saved in ./datasets/seg_images/val


In [20]:
IMAGE_DIR = f"./datasets/images/test"  
OUTPUT_DIR = f"./datasets/seg_images/test" 
MASK_OUTPUT_DIR = f"./datasets/new_masks/test"

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MASK_OUTPUT_DIR, exist_ok=True)

def process_images_in_directory(image_dir, mask_output_dir, output_dir):
    for image_name in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_name)
        
        image_bgr = cv2.imread(image_path)
        if image_bgr is None:
            print(f"Unable to read image {image_path}, skipping.")
            continue
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        sam2_result = mask_generator.generate(image_rgb)

        save_and_remove_background(image_bgr, sam2_result, image_name, mask_output_dir, output_dir)

def save_and_remove_background(image, masks, image_name, mask_output_dir, output_dir):
    for i, mask in enumerate(masks):
        mask_image = mask['segmentation'].astype(np.uint8) * 255  

        mask_filename = os.path.join(mask_output_dir, f"{os.path.splitext(image_name)[0]}_mask_{i}.png")
        cv2.imwrite(mask_filename, mask_image)  

        result_with_mask = cv2.bitwise_and(image, image, mask=mask_image)

        background = np.full(image.shape, 255, dtype=np.uint8)  
        mask_bg = cv2.bitwise_not(mask_image) 
        bg_removed_with_mask = cv2.bitwise_and(background, background, mask=mask_bg)

        receipt_only_with_mask = cv2.add(result_with_mask, bg_removed_with_mask)

        result_filename_with_mask = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_{i}.png")
        cv2.imwrite(result_filename_with_mask, receipt_only_with_mask)

        mask_inv = cv2.bitwise_not(mask_image)

        result_with_mask_inv = cv2.bitwise_and(image, image, mask=mask_inv)

        bg_removed_with_mask_inv = cv2.bitwise_and(background, background, mask=mask_image)

        receipt_only_with_mask_inv = cv2.add(result_with_mask_inv, bg_removed_with_mask_inv)

        result_filename_with_mask_inv = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_receipt_only_mask_inv_{i}.png")
        cv2.imwrite(result_filename_with_mask_inv, receipt_only_with_mask_inv)

process_images_in_directory(IMAGE_DIR, MASK_OUTPUT_DIR, OUTPUT_DIR)

print(f"Receipt-only images saved in {OUTPUT_DIR}")

Receipt-only images saved in ./datasets/seg_images/test
