## Data Science Assignment

#### Downloading the model sam-2 and installing all the libraries

In [None]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

In [None]:
!git clone https://github.com/facebookresearch/segment-anything-2.git

In [None]:
%cd segment-anything-2

In [None]:
%pip install -e .

In [1]:
%pip install -e ".[demo]"

Obtaining file:///C:/Users/Lenovo/Documents/daily_reports/sam-2
Note: you may need to restart the kernel to use updated packages.


ERROR: file:///C:/Users/Lenovo/Documents/daily_reports/sam-2 does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.

[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


#### Problem statetement 1:

#### importing all the libraries

In [None]:
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor
from PIL import Image, ImageOps
import numpy as np
import matplotlib.pyplot as plt
import os,glob,shutil
import matplotlib.patches as patches
import cv2

In [None]:
image_folder = r"C:\Users\Lenovo\Downloads\CMU10_3D\CMU10_3D\data_2D"

In [None]:
def load_sam_model():
    checkpoint = r"C:\Users\Lenovo\Documents\daily_reports\sam-2\sam2_hiera_tiny.pt"
    model_cfg = "sam2_hiera_t.yaml"
    predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
    sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
    mask_generator = SAM2AutomaticMaskGenerator(sam2)
    return mask_generator

In [None]:
def load_first_image_and_mask(image_name, image_folder, mask_folder):
    image_path = os.path.join(image_folder, image_name)
    mask_path = os.path.join(mask_folder, image_name.replace(".jpg", "_1_gt.png"))
    
    image = cv2.imread(image_path)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load mask as grayscale
    
    return image, mask

In [None]:
def fine_tune_sam2(mask_generator, image, mask):
    # Pass the image through SAM2
    masks = mask_generator.generate(image)
    
    filtered_masks = [m for m in masks if m['area'] > 500]  # Filter small masks
    
    return filtered_masks

In [None]:
def predict_masks_on_images(mask_generator, object_type, image_folder):
    predictions = {}
    
    images = [f for f in os.listdir(image_folder) if f.startswith(object_type) and f.endswith(".jpg")]
    
    for image_name in images:
        image_path = os.path.join(image_folder, image_name)
        image = cv2.imread(image_path)
        
        predicted_masks = mask_generator.generate(image)
        
        filtered_masks = [m for m in predicted_masks if m['area'] > 500]  
        predictions[image_name] = filtered_masks
    
    return predictions

#### Problem statement 2:

In [None]:
def mask_to_bounding_box(mask):
    binary_mask = (mask > 0).astype(np.uint8)
    x, y, w, h = cv2.boundingRect(binary_mask)
    return x, y, w, h

In [None]:
def convert_masks_to_bounding_boxes(predicted_masks):
    predicted_boxes = {}
    
    for image_name, mask in predicted_masks.items():
        x, y, w, h = mask_to_bounding_box(mask)
        predicted_boxes[image_name] = [(x, y, w, h)]
    
    return predicted_boxes

#### Problem statement 3

In [None]:
def calculate_iou(boxA, boxB):
    """
    Calculate the Intersection over Union (IoU) between two bounding boxes.
    """
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)

    boxAArea = boxA[2] * boxA[3]
    boxBArea = boxB[2] * boxB[3]

    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou

In [None]:
def load_ground_truth(image_folder, object_type):
    """
    Load the ground truth bounding boxes from mask files for the object type.
    """
    mask_files = sorted([f for f in os.listdir(image_folder) if f.startswith(object_type) and f.endswith("_1_gt.png")])
    
    ground_truth_boxes = {}
    
    for mask_name in mask_files:
        mask_path = os.path.join(image_folder, mask_name)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        x, y, w, h = mask_to_bounding_box(mask)
        ground_truth_boxes[mask_name] = (x, y, w, h)
    
    return ground_truth_boxes

In [None]:
def evaluate_predictions(predictions, ground_truths):
    """
    Evaluate the predicted bounding boxes against ground truth using IoU.
    """
    iou_threshold = 0.5
    num_correct = 0
    num_total = len(ground_truths)
    
    for image_name, predicted_boxes in predictions.items():
        gt_box = ground_truths.get(image_name.replace(".jpg", "_1_gt.png"))
        
        for pred_box in predicted_boxes:
            iou = calculate_iou(pred_box, gt_box)
            if iou > iou_threshold:
                num_correct += 1
                break
    
    accuracy = num_correct / num_total
    return accuracy

In [None]:
def main():
    # Define object types
    object_types = ["can_chowder", "can_soymilk", "can_tomatosoup", "carton_oj", 
                    "carton_soymilk", "diet_coke", "hc_potroastsoup", 
                    "juicebox", "rice_tuscan", "ricepilaf"]

    # Folder containing all images
    all_images_folder = "C:\Users\Lenovo\Downloads\CMU10_3D\CMU10_3D\data_2D"
    
    for object_type in object_types:
        # Paths to the first image and mask for each object type
        first_image_path = f"{all_images_folder}/{object_type}_000001.jpg"
        first_mask_path = f"{all_images_folder}/{object_type}_000001_1_gt.png"
        
        print(f"Processing {object_type}...")
        mask_generator = load_sam_model()

        predicted_masks = predict_masks_on_images(mask_generator, object_type, all_images_folder)
        
        predicted_boxes = convert_masks_to_bounding_boxes(predicted_masks)
        
        ground_truth_boxes = load_ground_truth(all_images_folder, object_type)
        
        accuracy = evaluate_predictions(predicted_boxes, ground_truth_boxes)
        print(f"Detection Accuracy for {object_type} (IoU > 0.5): {accuracy * 100:.2f}%")
        
