##Set-Up

Connect this notebook to a GPU Runtime to use Meta's Segment Anything Model.

Connect to Google Drive

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


Check the most updated version of ```wekzeug```, then install that version when installing dependencies.

In [None]:
!pip show werkzeug

Name: Werkzeug
Version: 3.0.1
Summary: The comprehensive WSGI web application library.
Home-page: 
Author: 
Author-email: 
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: MarkupSafe
Required-by: Flask, tensorboard


In [None]:
%%capture
!pip install segment-geospatial groundingdino-py
!pip install geopandas
!pip install -qq ipympl
!pip install leafmap
!pip install localtileserver
!pip install rasterio
!pip install werkzeug==3.0.1
!pip install flask-restx==1.2.0 localtileserver
!pip install large-image
!pip install rioxarray
!pip install rio-cogeo
# %pip install segment-geospatial groundingdino-py leafmap localtileserver
#!pip install --upgrade flask-restx

Restart *runtime* before continuing

In [None]:
import os
import numpy as np
import pandas as pd
import rasterio
import leafmap.leafmap as leafmap
import localtileserver
from samgeo.hq_sam import SamGeo, tms_to_geotiff, get_basemaps
from samgeo import tms_to_geotiff, split_raster
from samgeo.text_sam import LangSAM
import geopandas as gpd
from shapely.geometry import box
from shapely.geometry import Point
import matplotlib.pyplot as plt
import re
from subprocess import run
from pathlib import Path
import rioxarray as rxr
import xarray as xr

##Running SAM

**Initialize SAM**

From [samgeo.gishub](https://samgeo.gishub.org/examples/text_prompts/) on image segmentation using text prompts.

In [None]:
sam = LangSAM()

GroundingDINO_SwinB.cfg.py:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

final text_encoder_type: bert-base-uncased


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

groundingdino_swinb_cogcoor.pth:   0%|          | 0.00/938M [00:00<?, ?B/s]

Downloading: "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth" to /root/.cache/torch/hub/checkpoints/sam_vit_h_4b8939.pth
100%|██████████| 2.39G/2.39G [00:16<00:00, 157MB/s]


**Note**: You will need to update the root, path, input and output flight folder to run for different flights. *Double check all changes have been made before running!*

In [None]:
root = "/content/gdrive/MyDrive/" #fill this in with your path
subdir = Path(root) / "Flight1Tiles" #or your folder path
folder = os.path.join(root, subdir)
files = os.listdir(folder)
outdir = Path(root) / "masks" / "flight1" # make this to catch SAM output

files2 = [os.path.join(folder, file) for file in files]
files2

In [None]:
#For loop to reproject
for f in files2:
    if f.endswith(".tif"):  # Check if the file ends with '.tif'
        # reproject it within CCS bounds and COG-ify
        img = rxr.open_rasterio(str(f)).squeeze()
        img = img.isel(band=~(img.band == 4))
        img = img.where(img != 255, 254)
        gcs_bounds = img.rio.transform_bounds("EPSG:4326")
        img_reproj = img.rio.reproject("EPSG:4326", resolution=0.1/111111)\
            .rio.clip_box(minx=gcs_bounds[0], miny=gcs_bounds[1],
                          maxx=gcs_bounds[2], maxy=gcs_bounds[3])

        out_path = Path(root) / "flight1_out" /\ #set your preferred output location
            re.sub(".tif", "_gcs.tif", os.path.basename(str(f)))  # Use 'f' directly here
        img_reproj.rio.to_raster(out_path, driver="GTiff")

        cmd = ['rio', 'cogeo', 'create', '-b', '1,2,3', out_path, out_path]
        p = run(cmd, capture_output=True)
        msg = p.stderr.decode().split('\n')
        print(f'...{msg[-2]}')

        cmd = ['rio', 'cogeo', 'validate', out_path]
        p = run(cmd, capture_output=True)
        msg = p.stdout.decode().split('\n')
        print(f'...{msg[0]}')

        if f.endswith("_gcs.tif"):
          print("Breaking due to _gcs.tif condition")
          break
    else:
        print(f"Skipping {f}: Not a TIFF file")

In [None]:
import glob

In [None]:
#replace this with your folder path
folder_path = "/content/gdrive/MyDrive" #use your out_path location

files3 = glob.glob(os.path.join(folder_path, "*_gcs.tif"))
# Correct list comprehension using files3
files3 = [file_path for file_path in files3 if file_path.endswith('_gcs.tif')]
files3

In [None]:
for image_path in files3:
    # Set the image path for SAM
    sam.set_image(image_path)

    # Specify text prompt
    text_prompt = "building"

    # Segment the image and save output
    outname = os.path.basename(image_path).replace(".tif", "_mask.tif")
    sam.predict(image_path, text_prompt, box_threshold=0.24, text_threshold=0.24,
                output=str(Path(outdir) / outname))

    # Convert masked buildings to vectors
    sam.raster_to_vector(str(Path(outdir) / outname), str(Path(outdir) / outname.replace(".tif", ".shp")))