<a target="_blank" href="https://colab.research.google.com/github/biigle/community-resources/blob/master/zeroShot/GroundingDino.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

The script runs faster using a GPU with cuda. To enable cuda, go to Runtime -> Change runtime type -> Hardware accelerator -> GPU

In [None]:
# download biigle.py
!wget https://raw.githubusercontent.com/biigle/community-resources/master/biigle/biigle.py

In [15]:
import torch
from PIL import Image, ImageDraw
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection 
import biigle
import requests
from IPython.display import display

#### Please enter your individual image_id, biigle token and email here

In [1]:
# Please enter the image id of the image you want to use here. (If you are in the annotation view, the image id is the number after https://biigle.de/images/, e.g. https://biigle.de/images/4823299/annotations?r=205&x=1024&y=683 than 4823299 is the image_id)
image_id = -1

In [11]:
# Please enter the you registered with in Biigle here
email = ""
# Please enter the biigle token (NOT YOUR PASSWORD) here. You can get one here: https://biigle.de/settings/tokens
token = ""

In [None]:
# check if everything is set
if image_id == -1 or email == "" or token == "":
    print("Please set the volume_id, email and token in the script.")
    exit(1)

#### Check if CUDA is available and set the device to compute on

In [None]:
# check if cuda is available.
device="cpu"
if not torch.cuda.is_available():
    print("Warning: CUDA is not available. The script will be slower than with CUDA. To enable cuda, go to Runtime -> Change runtime type -> Hardware accelerator -> GPU")
else:
    print("CUDA is available.")
    device="cuda"

#### Initialize the zero shot object detector from huggingface

In [5]:
modelstr = "IDEA-Research/grounding-dino-base"
processor = AutoProcessor.from_pretrained(modelstr)
model = AutoModelForZeroShotObjectDetection.from_pretrained(modelstr).to(device)

In [7]:
prompt=input('What are you searching for. Please type in something like "a fish." Please note that the form is very important it must be a <object>. <- The dot is also important. Press enter to continue.')

#### Download, open and show the image

In [None]:
# this is the API endpoint to get an image file
image_url = f"https://biigle.de/api/v1/images/{image_id}/file"
# create a basic authentification object from the email and token
basic = requests.auth.HTTPBasicAuth(email, token)
# download the image and open it
image = Image.open(requests.get(image_url, auth=basic, stream=True).raw)
display(image)

#### Prepare the input to the model

In [None]:
inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

#### Apply the model to get the object detections

In [None]:
with torch.no_grad():
    outputs = model(**inputs)
# post process the outputs
results = processor.post_process_grounded_object_detection(
    outputs,
    inputs.input_ids,
    box_threshold=0.4,
    text_threshold=0.3,
    target_sizes=[image.size[::-1]]
)
# save the boxes and their scores in respective lists
boxes=[]
scores=[]
prediction = results[0]
for score, box in zip(prediction["scores"].tolist(), prediction["boxes"].tolist()):
    boxes.append(box)
    scores.append(score)

#### Visualize the results

In [None]:
# initialize the image draw object to draw the boxes onto the image
draw = ImageDraw.Draw(image)
# iterate over all boxes and scores...
for box, score in zip(boxes, scores):
    # ...to draw the actual box on the image
    draw.rectangle(box, outline="blue", width=3)
    # with his respective score
    draw.text((box[0], box[3]), f"{round(score, 3)}", fill="red")
# display the result
display(image)

#### Reformat the AI annotations to be uploaded to BIIGLE

In [None]:
# Use the Grounding DINO object ID for now. You can change that to any other label id you want to use (The label tree must be available in that project).
label_id = 392500

# initialize an empty array to hold the biigle style annotations
annotationlist=[]
#initialize the biigle api helper
api = biigle.Api(email=email, token=token)
for box, score in zip(boxes, scores):
    # Biigle uses rectangle annotations providing all 4 corners of the rectangle. We need to convert the boxes to this format. Currently they are in the format [x1, y1, x2, y2]
    biigle_box=[box[0],box[1],box[2],box[1],box[2],box[3],box[0],box[3]]
    # an annotation for the API needs to provide the following information in a dictionary:
        # image_id: the id of the image the annotation is on
        # shape_id: the id of the shape (5 is a rectangle; To get all available shapes, see https://biigle.de/api/v1/shapes)
        # points: the points of the shape in the format [x1, y1, x2, y2, x3, y3, x4, y4]
        # label_id: the id of the label the annotation should have
        # confidence: the confidence of the model in the prediction (currently saved but not shown in BIIGLE)
    annotationlist.append({"image_id":image_id,"shape_id":5,"points":biigle_box,"label_id":label_id,"confidence":score})

#### Upload the data to Biigle

In [None]:
# initialize the Biigle api
api = biigle.Api(email=email, token=token)
# At most 100 annotations can be uploaded in one request so chunk them if there is more
for i in range(0,len(annotationlist),100):
    api.post('image-annotations',json=annotationlist[i:i+100])