<a href="https://colab.research.google.com/github/geoaigroup/vlm4eo/blob/main/SAM_Demo/SAM_GEOAI_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Samples used in this demo is from the WHU Building Dataset

WHU dataset : https://paperswithcode.com/dataset/whu-building-dataset

# Object masks from prompts with SAM
This section was prepared by Ali Mayladan.

The Segment Anything Model (SAM) predicts object masks given prompts that indicate the desired object. The model first converts the image into an image embedding that allows high quality masks to be efficiently produced from a prompt.

The `SamPredictor` class provides an easy interface to the model for prompting the model. It allows the user to first set an image using the `set_image` method, which calculates the necessary image embeddings. Then, prompts can be provided via the `predict` method to efficiently predict masks from those prompts. The model can take as input both point and box prompts, as well as masks from the previous iteration of prediction.

## Environment Set-up

If running locally using jupyter, first install `segment_anything` in your environment using the [installation instructions](https://github.com/facebookresearch/segment-anything#installation) in the repository. If running from Google Colab, set `using_colab=True` below and run the cell. In Colab, be sure to select 'GPU' under 'Edit'->'Notebook Settings'->'Hardware accelerator'.

In [None]:
!wget https://github.com/geoaigroup/geoaigroup-website/raw/main/content/media/SAM_26May2023/data.zip
!unzip data.zip

In [None]:
using_colab = True

In [None]:
if using_colab:
    import torch
    import torchvision
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())
    import sys
    !{sys.executable} -m pip install opencv-python matplotlib
    !{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'

    !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
    !pip install geopandas
    !pip install rasterio


## Set-up

Necessary imports and helper functions for displaying points, boxes, and masks.

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
import geopandas as gpd
import os
import json
import glob
from tqdm import tqdm
import shapely.geometry as sg
from shapely import affinity
from shapely.geometry import Point, Polygon
import random
from PIL import Image, ImageDraw
import rasterio
from rasterio.features import geometry_mask
#from metrics import DiceScore,IoUScore
import pandas as pd
import gc
import shutil
import fiona
from SAM_Demo import utils
from SAM_Demo.evaluate import cal_scores
from SAM_Demo.pred_SAM import SAM

In [None]:
def main(prompt_type="",sam=None):
    score_list = []
    ff = gpd.read_file(pred)
    ids = [f for f in os.listdir(orig_shp)]
    for name in tqdm(ids):
        print(name)
        print("Checking")
        flag=0
        if glob.glob(output_dir + "/" + name + "/" + name + ".shp" ) or glob.glob(output_dir + "/" + name + "/" + name + ".png" ):
            print("Found")
            continue

        tile_boxes = []
        try:
            image = cv2.imread(images + "/" + name+'.png')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        except Exception as e:
            print(e)
            print(name)

        if name in os.listdir(orig_shp):
            gt = gpd.read_file(orig_shp + "/" + name)
            if len(gt["geometry"]) == 0:
                continue
        else:
            continue
        predic = ff.loc[ff["ImageId"] == name]
        # predic = gpd.read_file(pred+'/'+name)
        geo = predic["geometry"]

        if len(geo) == 0:
            continue

        input_point=[]
        input_label=[]
        ###creating boxes
        # if prompt_type=="boxes":
        tile_boxes=utils.create_boxes(geo)
        input_boxes=torch.tensor(tile_boxes).cuda()
        # ###creating inner negative points
        # if prompt_type=="inner negative points":
        #     input_point,input_label=utils.generate_in_negative_points(geo)

        # ##creating outer negative points
        # if prompt_type=="outer negative points":
        #     input_point,input_label=utils.generate_out_negative_points(geo)
        #     if len(input_point)==0:
        #         flag=1

        # ###creating list of single points"
        # if prompt_type=="single positive point":
        #     input_point,input_label=utils.create_list_points(geo,name)

        # elif prompt_type=="single negative point":
        #     input_point,input_label=utils.create_list_points(geo,name,flag="negative")

        # ###creating list of random points for each polygon
        # if prompt_type=="random positive points for each polygon":
        #     input_point,input_label=utils.generate_random_points_polygon(geo)

        # elif prompt_type=="random negative points for each polygon":
        #     input_point,input_label=utils.generate_random_points_polygon(geo,flag="negative")

        # ##for mask input
        # if prompt_type=="mask input":
        #     gtm = utils.convert_polygon_to_mask(geo)
        #     gtmask = utils.convert_polygon_to_mask_batch(geo)
        #     mask_input = torch.from_numpy(gtmask).unsqueeze(1).float().cuda()

        x = torch.from_numpy(image.transpose(2, 0, 1)).float().cuda()
        pred_mask=sam.predictSAM(x=x,image=image,input_point=input_point,input_label=input_label,input_boxes=input_boxes,flag=flag)
        os.makedirs(score_dir, exist_ok=True)
        os.makedirs(output_dir + "/" + f"{name}", exist_ok=True)
        scores=cal_scores(output_dir,score_dir)
        scores.micro_match_iou(pred_mask,name,gt,score_list,image,tile_boxes,geo=geo)
    scores.macro_score()


In [None]:
# Paths
images = "data/images"
output_dir = "data/output"
pred = "data/pred_shapefile"
orig_shp="data/orig_shp"
score_dir = "data/scores"
sam=SAM()
main(sam=sam)

LangSAM : https://github.com/luca-medeiros/lang-segment-anything

GroundingDINO: https://github.com/IDEA-Research/GroundingDINO

Segment Anything : https://github.com/facebookresearch/segment-anything

**Language Segment-Anything (LangSAM)** is an open-source project that combines the power of instance segmentation and text prompts to generate masks for specific objects in images. Built on the recently released Meta model, segment-anything, and the GroundingDINO detection model, it's an easy-to-use and effective tool for object detection and image segmentation.

This section was prepared by Hasan Moughnieh.

In [None]:
!wget https://github.com/geoaigroup/geoaigroup-website/raw/main/content/media/SAM_26May2023/LangSAM.zip
!unzip LangSAM.zip

In [None]:
import sys
!{sys.executable} -m pip install opencv-python matplotlib
!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'
!pip install git+https://github.com/openai/CLIP.git

In [3]:
import os
HOME = os.getcwd()
print(HOME)

/content


In [None]:
%cd {HOME}
!git clone https://github.com/IDEA-Research/GroundingDINO.git
%cd {HOME}/GroundingDINO
!pip install -q -e .
!pip install -q roboflow

In [5]:
!python pre_langsam.py





box_threshold: This value is used for object detection in the image. A higher value makes the model more selective, identifying only the most confident object instances, leading to fewer overall detections. A lower value, conversely, makes the model more tolerant, leading to increased detections, including potentially less confident ones.

text_threshold: This value is used to associate the detected objects with the provided text prompt. A higher value requires a stronger association between the object and the text prompt, leading to more precise but potentially fewer associations. A lower value allows for looser associations, which could increase the number of associations but also introduce less precise matches.

The optimal threshold can vary depending on the quality and nature of your images, as well as the specificity of your text prompts.


In [None]:
from PIL import Image
from LangSAM import LangSAM
from demo_functions import load_ground_truth_masks , display_images_with_masks

index = '2_34'
#replace 2_36 and 2_37 for more samples
image = f'/content/{index}_img.png'
ground_truth_mask = f'/content/{index}_gt.png'

model = LangSAM()

image_pil = Image.open(image).convert("RGB")
text_prompt = "house"
masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)
ground_truth_masks = load_ground_truth_masks(ground_truth_mask)

#This function displays the original image , predicted masks , and accuracy compared to ground truth
display_images_with_masks(image_pil, masks , boxes, ground_truth_mask)

Evaluation

In [None]:
from demo_functions import MaskMatchingAlgorithm

matcher = MaskMatchingAlgorithm(ground_truth_mask, masks)
output = matcher.matching()
iou_list = output[0]
tp_pred_indices = output[1]
tp_gt_indices = output[2]
fp_indices = output[3]
fn_indices = output[4]
tp_iou_list, avg_tp_iou = matcher.tp_iou(tp_pred_indices, tp_gt_indices)
average_iou, avg_tp_iou , precision , recall , f1_score, tp_f1 = matcher.display_results(iou_list ,tp_pred_indices,
                                                                                                tp_gt_indices,
                                                                                                fp_indices,
                                                                                                fn_indices ,
                                                                                                tp_iou_list, 
                                                                                                avg_tp_iou)
print('average IoU:' , average_iou)
print("average TP IoU" , avg_tp_iou)
print("precision" , precision)
print("recall" , recall)
print("F1 score" , f1_score)
print("TP F1 score" , tp_f1)