<a href="https://colab.research.google.com/github/abhishek7467/Data_Science_notebooks/blob/main/my_SAM_HQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
print("PyTorch version:", torch.__version__)
print("CUDA is available:", torch.cuda.is_available())

# !git clone https://github.com/SysCV/sam-hq.git
# !pip install timm
os.chdir('sam-hq')
# !export PYTHONPATH=$(pwd)
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor


In [None]:
# !mkdir pretrained_checkpoint
# !wget https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_l.pth
# !mv sam_hq_vit_l.pth pretrained_checkpoint


In [None]:
def show_anns(anns):
    if len(anns) == 0:
        return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)
    img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
    img[:,:,3] = 0
    for ann in sorted_anns:
        m = ann['segmentation']
        color_mask = np.concatenate([np.random.random(3), [0.35]])
        img[m] = color_mask
    ax.imshow(img)


In [None]:
img_path =r"C:\Users\spx016\Downloads\Images_With_output_RMBG\image_20241211_193112.jpeg"


In [None]:
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.axis('off')
plt.show()


In [None]:
sam_checkpoint = "E:\Image_Segmentation\sam-hq\pretrained_checkpoint\sam_hq_vit_h.pth"
model_type = "vit_h"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
sam.eval()
predictor = SamPredictor(sam)


In [None]:
image = Image.open(img_path)
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]


In [None]:
id2label = {1: "motorcycle"}
img  = np.array(image)
img_real = np.array(image)
img_2 = np.array(image)


In [None]:
predictor.set_image(img_real)


In [None]:
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    print("box ", type(box), box , "score" ,score.item())
    box = box.detach().cpu().numpy()
    box = [int(round(i)) for i in box]
    x1, y1, x2, y2 = box
    label_name = id2label.get(label.item(), "Unknown")
    confidence = score.item()
    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
    text = f"{label_name} ({confidence})"
    text_y = max(y1 - 10, 10)
    cv2.putText(img, text, (x1, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
file_path = "E:\Image_Segmentation\sam-hq\myImage.png"
print(cv2.imwrite(file_path, img))
{os.path.abspath(file_path)}


In [None]:
plt.imshow(img)


In [None]:
results["scores"].detach().cpu().numpy()


In [None]:
print(np.argmax(results["scores"].detach().cpu().numpy()))


In [None]:
id2label = {1: "motorcycle"}
img_2 = np.array(image)
box1 = results["boxes"][np.argmax(results["scores"].detach().cpu().numpy())].detach().cpu().numpy()
box1 = [int(round(i)) for i in box1]
x1, y1, x2, y2 = box1
label_name = id2label.get(label.item(), "Unknown")
confidence = score.item()
cv2.rectangle(img_2, (x1, y1), (x2, y2), (0, 255, 0), 2)
text = f"{label_name} ({confidence})"
text_y = max(y1 - 10, 10)
cv2.putText(img_2, text, (x1, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


In [None]:
input_box = np.array(box1)
input_label = np.array([1])
input_point = np.array([[(x1 + x2) / 2, (y1 + y2) / 2]])


In [None]:
masks, scores, logits = predictor.predict(
        point_coords=input_point,
        point_labels=input_label,
        box = input_box,
        multimask_output=False,
        hq_token_only=True,
    )


In [None]:
mask_uint8 = masks.astype(np.uint8) * 255


In [None]:
mask_color = cv2.merge([mask_uint8, np.zeros_like(mask_uint8), np.zeros_like(mask_uint8)])
mask_color = np.squeeze(mask_color)
plt.imshow(mask_color)


In [None]:

# Step 1: Extract the red channel from `mask_color` to create a binary mask
binary_mask = mask_color[..., 0]  # Use only the red channel as `mask_color` is created with red

# Ensure the mask is binary (values: 0 or 255)
binary_mask = (binary_mask > 0).astype(np.uint8) * 255

# Step 2: Resize the mask to match the image size, if needed
if binary_mask.shape[:2] != img_real.shape[:2]:
    binary_mask = cv2.resize(binary_mask, (img_real.shape[1], img_real.shape[0]), interpolation=cv2.INTER_NEAREST)

# Step 3: Extract the object using bitwise operation
object_extracted = cv2.bitwise_and(img_real, img_real, mask=binary_mask)

# Step 4: Create a transparent background (optional, if you want PNG with transparency)
object_with_alpha = np.zeros((img_real.shape[0], img_real.shape[1], 4), dtype=np.uint8)
object_with_alpha[..., :3] = object_extracted  # Copy RGB channels
object_with_alpha[..., 3] = binary_mask        # Add alpha channel based on the mask
# Step 5: Display the result
plt.figure(figsize=(8, 8))
cv2.imwrite("C:\\Users\\spx016\\Downloads\\image (2) (1).png", object_with_alpha)

plt.imshow(cv2.cvtColor(object_extracted, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.title("Extracted Object")
plt.show()



In [None]:
# if len(mask_uint8.shape) == 3:
#     mask_uint8 = mask_uint8[..., 0]
# if mask_uint8.shape[:2] != img_real.shape[:2]:
#     mask = cv2.resize(mask_uint8, (img_real.shape[1], img_real.shape[0]), interpolation=cv2.INTER_NEAREST)
# else:
#     mask = mask_uint8
# transparent_object = np.zeros((img_real.shape[0], img_real.shape[1], 4), dtype=np.uint8)
# transparent_object[..., :3] = img_real
# transparent_object[..., 3] = mask
# cv2.imwrite("C:\\Users\\spx016\\Downloads\\image (2) (1).png", transparent_object)
# plt.figure(figsize=(8, 8))
# plt.imshow(cv2.cvtColor(transparent_object[..., :3], cv2.COLOR_BGR2RGB))
# plt.axis("off")
# plt.title("Extracted Object with Transparency")
# plt.show()


In [None]:

image_dir = "C:\\Users\\spx016\\Downloads\\Images_With_output_RMBG\\"
output_image_dir = "C:\\Users\\spx016\\Downloads\\Image_With_Output_SAM_resnet50\\"
for filename in os.listdir(image_dir):
    if filename.endswith(('.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
        # Full path to the image file
        filepath = os.path.join(image_dir, filename)

        image = Image.open(filepath)
        processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
        model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
        inputs = processor(images=image, return_tensors="pt")
        outputs = model(**inputs)
        target_sizes = torch.tensor([image.size[::-1]])
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
        img_real = np.array(image)
        predictor.set_image(img_real)

        id2label = {1: "motorcycle"}
        img_2 = np.array(image)
        try:
            box1 = results["boxes"][np.argmax(results["scores"].detach().cpu().numpy())].detach().cpu().numpy()
            box1 = [int(round(i)) for i in box1]
            input_box = np.array(box1)
            input_label = np.array([1])
            input_point = np.array([[(x1 + x2) / 2, (y1 + y2) / 2]])


            masks, scores, logits = predictor.predict(
            point_coords=input_point,
            point_labels=input_label,
            box = input_box,
            multimask_output=False,
            hq_token_only=True,
            )

            mask_uint8 = masks.astype(np.uint8) * 255

            mask_color = cv2.merge([mask_uint8, np.zeros_like(mask_uint8), np.zeros_like(mask_uint8)])
            mask_color = np.squeeze(mask_color)

            # Step 1: Extract the red channel from `mask_color` to create a binary mask
            binary_mask = mask_color[..., 0]  # Use only the red channel as `mask_color` is created with red

            # Ensure the mask is binary (values: 0 or 255)
            binary_mask = (binary_mask > 0).astype(np.uint8) * 255

            # Step 2: Resize the mask to match the image size, if needed
            if binary_mask.shape[:2] != img_real.shape[:2]:
                binary_mask = cv2.resize(binary_mask, (img_real.shape[1], img_real.shape[0]), interpolation=cv2.INTER_NEAREST)

            # Step 3: Extract the object using bitwise operation
            object_extracted = cv2.bitwise_and(img_real, img_real, mask=binary_mask)

            # Step 4: Create a transparent background (optional, if you want PNG with transparency)
            object_with_alpha = np.zeros((img_real.shape[0], img_real.shape[1], 4), dtype=np.uint8)
            object_with_alpha[..., :3] = object_extracted  # Copy RGB channels
            object_with_alpha[..., 3] = binary_mask        # Add alpha channel based on the mask
            # Step 5: Display the result
            base_name, ext = os.path.splitext(filename)

            # Create the output file name with the original name and "__removedObj" appended
            output_filename = f"{base_name}__removedObj.png"
            output_path = os.path.join(output_image_dir, output_filename)

            cv2.imwrite(output_path, object_with_alpha)
            # Save the image

            INput_filename = f"{base_name}____{ext}"
            INput_path = os.path.join(output_image_dir, INput_filename)
            image.save(INput_path)
            print(f"Saved image: {output_path}")
            print(f"Saved image: {INput_path}")
        except:
            print("  Except Saved image ")
            INput_filename = f"{base_name}__non_SEGMENTATION__{ext}"
            INput_path = os.path.join(output_image_dir, INput_filename)
            image.save(INput_path)
            print(f"Except Saved image: {INput_path}")



In [None]:
# Except Saved image: C:\Users\spx016\Downloads\Image_With_Output_SAM_resnet50\image_20241211_192652__non_SEGMENTATION__.jpg
# Except Saved image: C:\Users\spx016\Downloads\Image_With_Output_SAM_resnet50\image_20241211_193112__non_SEGMENTATION__.jpeg



In [None]:
# Load model directly
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import cv2 , torch
processor = AutoImageProcessor.from_pretrained("ArrayDice/Vehicle_Detection_Model_Zoom")
model = AutoModelForObjectDetection.from_pretrained("ArrayDice/Vehicle_Detection_Model_Zoom")


In [None]:
img_path =r"C:\Users\spx016\Downloads\object-detection_bike\image_20241211_193116____.jpg"
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


In [None]:
# Load model and processor
processor = AutoImageProcessor.from_pretrained("ArrayDice/Vehicle_Detection_Model_Zoom")
model = AutoModelForObjectDetection.from_pretrained("ArrayDice/Vehicle_Detection_Model_Zoom")

# Load and preprocess the image
img_path = r"C:\Users\spx016\Downloads\object-detection_bike\image_20241211_193116____.jpg"
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = image / 255.0

# Process the image with the model
inputs = processor(images=image, return_tensors="pt")
output = model(**inputs)

# Correctly define target_sizes from image.shape
height, width, _ = image.shape  # Get height and width of the image
target_sizes = torch.tensor([[height, width]])  # Use the correct format for target_sizes

# Post-process the model outputs
results = processor.post_process_object_detection(output, target_sizes=target_sizes, threshold=0.9)[0]


In [None]:
# Example inputs
logits = output.logits  # Shape: [batch_size, num_queries, num_classes]
pred_boxes = output.pred_boxes  # Shape: [batch_size, num_queries, 4]
image_size = (width, height)  # Replace with your image dimensions

# Softmax to get class probabilities
probs = torch.softmax(logits, dim=-1)

# Extract class labels and scores
scores, labels = probs.max(dim=-1)

# Denormalize boxes to image dimensions
boxes = pred_boxes * torch.tensor([image_size[0], image_size[1], image_size[0], image_size[1]])

# Apply confidence threshold
threshold = 0.95
keep = scores > threshold

# Filter predictions
filtered_boxes = boxes[keep]
filtered_scores = scores[keep]
filtered_labels = labels[keep]
print(filtered_scores)
print(filtered_labels)
print(filtered_boxes)


tensor([0.9972, 0.9986, 0.9980, 0.9978, 0.9985, 0.9989, 0.9987, 0.9988, 0.9976,
        0.9948, 0.9979, 0.9975, 0.9982, 0.9980, 0.9983, 0.9980, 0.9945, 0.9983,
        0.9985, 0.9984, 0.9979, 0.9979, 0.9968, 0.9972, 0.9990, 0.9989, 0.9979,
        0.9989, 0.9982, 0.9970, 0.9969, 0.9985, 0.9977, 0.9980, 0.9979, 0.9987,
        0.9981, 0.9989, 0.9988, 0.9988, 0.9985, 0.9974, 0.9986, 0.9979, 0.9979,
        0.9990, 0.9989, 0.9985, 0.9971, 0.9989, 0.9968, 0.9987, 0.9979, 0.9988,
        0.9987, 0.9988, 0.9981, 0.9987, 0.9986, 0.9979, 0.9986, 0.9988, 0.9958,
        0.9984, 0.9956, 0.9984, 0.9959, 0.9980, 0.9987, 0.9977, 0.9978, 0.9988,
        0.9988, 0.9989, 0.9975, 0.9990, 0.9988, 0.9969, 0.9987, 0.9973, 0.9975,
        0.9988, 0.9967, 0.9967, 0.9989, 0.9978, 0.9973, 0.9989, 0.9978, 0.9988,
        0.9989, 0.9985, 0.9980, 0.9987, 0.9989, 0.9984, 0.9985, 0.9978, 0.9985,
        0.9987], grad_fn=<IndexBackward0>)
tensor([11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 1

In [None]:
from transformers import pipeline


In [None]:
detector = pipeline(task="object-detection")
preds = detector(
    img_path
)


In [None]:
preds


In [None]:
for i in range(len(preds)):
    score = preds[i]["score"]
    label = preds[i]["label"]
    box  = preds[i]["box"]
    print(score , label, box)
    x1, y1, x2, y2  = box["xmin"], box["ymin"], box["xmax"], box["ymax"]
    print(x1, y1, x2, y2)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    text = f"{label} ({score})"
    text_y = max(y1 - 10, 10)
    cv2.putText(image, text, (x1, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.axis('off')
plt.show()


In [None]:
max_index = max(range(len(preds)), key=lambda i: preds[i]['score'])
max_index


In [None]:

image_dir = "C:\\Users\\spx016\\Downloads\\object-detection_bike\\"
output_image_dir = "C:\\Users\\spx016\\Downloads\\object-detection_bike\\"
for filename in os.listdir(image_dir):
    if filename.endswith(('.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
        # Full path to the image file
        filepath = os.path.join(image_dir, filename)

        image = Image.open(filepath)
        detector = pipeline(task="object-detection")
        preds = detector(
            filepath
        )
        img_real = np.array(image)
        predictor.set_image(img_real)
        img_2 = np.array(image)
        try:
            # for i in range(len(preds)):
            max_index = max(range(len(preds)), key=lambda i: preds[i]['score'])
            score = preds[max_index]["score"]
            label = preds[max_index]["label"]
            box  = preds[max_index]["box"]
            print(score , label, box)
            x1, y1, x2, y2  = box["xmin"], box["ymin"], box["xmax"], box["ymax"]
            input_box = np.array([x1, y1, x2, y2])
            input_label = np.array([1])
            input_point = np.array([[(x1 + x2) / 2, (y1 + y2) / 2]])


            masks, scores, logits = predictor.predict(
            point_coords=input_point,
            point_labels=input_label,
            box = input_box,
            multimask_output=False,
            hq_token_only=True,
            )

            mask_uint8 = masks.astype(np.uint8) * 255

            mask_color = cv2.merge([mask_uint8, np.zeros_like(mask_uint8), np.zeros_like(mask_uint8)])
            mask_color = np.squeeze(mask_color)

            # Step 1: Extract the red channel from `mask_color` to create a binary mask
            binary_mask = mask_color[..., 0]  # Use only the red channel as `mask_color` is created with red

            # Ensure the mask is binary (values: 0 or 255)
            binary_mask = (binary_mask > 0).astype(np.uint8) * 255

            # Step 2: Resize the mask to match the image size, if needed
            if binary_mask.shape[:2] != img_real.shape[:2]:
                binary_mask = cv2.resize(binary_mask, (img_real.shape[1], img_real.shape[0]), interpolation=cv2.INTER_NEAREST)

            # Step 3: Extract the object using bitwise operation
            object_extracted = cv2.bitwise_and(img_real, img_real, mask=binary_mask)

            # Step 4: Create a transparent background (optional, if you want PNG with transparency)
            object_with_alpha = np.zeros((img_real.shape[0], img_real.shape[1], 4), dtype=np.uint8)
            object_with_alpha[..., :3] = object_extracted  # Copy RGB channels
            object_with_alpha[..., 3] = binary_mask        # Add alpha channel based on the mask
            # Step 5: Display the result
            base_name, ext = os.path.splitext(filename)

            # Create the output file name with the original name and "__removedObj" appended
            output_filename = f"{base_name}__removedObj.png"
            output_path = os.path.join(output_image_dir, output_filename)

            cv2.imwrite(output_path, object_with_alpha)
            # Save the image

            INput_filename = f"{base_name}____{ext}"
            INput_path = os.path.join(output_image_dir, INput_filename)
            image.save(INput_path)
            print(f"Saved image: {output_path}")
            print(f"Saved image: {INput_path}")
        except:
            print("  Except Saved image ")
            INput_filename = f"{base_name}__non_SEGMENTATION__{ext}"
            INput_path = os.path.join(output_image_dir, INput_filename)
            image.save(INput_path)
            print(f"Except Saved image: {INput_path}")

