From c2d72935938576ad760069b6377e984050bae7d7 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Thu, 3 Nov 2022 16:35:57 -0700 Subject: [PATCH 01/17] User guide tweak to show how to send in a JPEG image. --- UserGuide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UserGuide.md b/UserGuide.md index f8a68cc0..83459a09 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -7,7 +7,7 @@ Groundlight makes it simple to understand images. You can easily create compute *Note: The SDK is currently in "beta" phase. Interfaces are subject to change in future versions.* -## Simple Example +## Computer vision made simple How to build a computer vision system in 5 lines of python code: @@ -15,7 +15,7 @@ How to build a computer vision system in 5 lines of python code: from groundlight import Groundlight gl = Groundlight() d = gl.create_detector("door", query="Is the door open?") # define with natural language -image_query = gl.submit_image_query(detector=d, image="path/filename.jpeg") # send an image +image_query = gl.submit_image_query(detector=d, image=jpeg_img) # bytes or filename print(f"The answer is {image_query.result}") # get the result ``` From b51b9c14147c532b9d17850d519131ffdcb698d1 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Thu, 10 Nov 2022 16:38:40 -0800 Subject: [PATCH 02/17] WIP to support client-side polling --- pyproject.toml | 2 +- src/groundlight/client.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 061de78d..baf9afcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "groundlight" -version = "0.5.4" +version = "0.6.0" license = "MIT" readme = "UserGuide.md" homepage = "https://groundlight.ai" diff --git a/src/groundlight/client.py b/src/groundlight/client.py index ccc4c919..bef51bb2 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -107,6 +107,7 @@ def submit_image_query( self, detector: Union[Detector, str], image: Union[str, bytes, BytesIO, BufferedReader], + wait: float = 0, ) -> ImageQuery: """Evaluates an image with Groundlight. :param detector: the Detector object, or string id of a detector like `det_12345` @@ -114,12 +115,14 @@ def submit_image_query( - a filename (string) of a jpeg file - a byte array or BytesIO with jpeg bytes - a numpy array in the 0-255 range (gets converted to jpeg) + :param wait: How long to wait (in seconds) for a confident answer """ if isinstance(detector, Detector): detector_id = detector.id else: detector_id = detector image_bytesio: Union[BytesIO, BufferedReader] + #TODO: support PIL Images if isinstance(image, str): # Assume it is a filename image_bytesio = buffer_from_jpeg_file(image) @@ -134,5 +137,14 @@ def submit_image_query( "Unsupported type for image. We only support JPEG images specified through a filename, bytes, BytesIO, or BufferedReader object." ) - obj = self.image_queries_api.submit_image_query(detector_id=detector_id, body=image_bytesio) - return ImageQuery.parse_obj(obj.to_dict()) + img_query = self.image_queries_api.submit_image_query(detector_id=detector_id, body=image_bytesio) + if wait: + threshold = confidence_threshold_for_detector(detector) + img_query = self._poll_for_confident_result(img_query, threshold) + return ImageQuery.parse_obj(img_query.to_dict()) + + def _poll_for_confident_result(self, img_query:"ImageQuery", wait: float, threshold: float) -> "ImageQuery": + start_time = time.time() + while time.time() - start_time < wait: + pass + return img_query From a56538af8daf54d4a7b8c4d720119a6cbb898d18 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Thu, 10 Nov 2022 16:47:36 -0800 Subject: [PATCH 03/17] Updating userguide. --- UserGuide.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/UserGuide.md b/UserGuide.md index 83459a09..118f77b9 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -2,23 +2,22 @@ Groundlight makes it simple to understand images. You can easily create computer vision detectors just by describing what you want to know using natural language. -**How does it work?** Your images are first analyzed by machine learning (ML) models which are automatically trained on your data. If those models have high enough confidence, that's your answer. But if the models are unsure, then the images are progressively escalated to more resource-intensive analysis methods up to real-time human review. So what you get is a computer vision system that starts working right away without even needing to first gather and label a dataset. At first it will operate with high latency, because people need to review the image queries. But over time, the ML systems will learn and improve so queries come back faster with higher confidence. - -*Note: The SDK is currently in "beta" phase. Interfaces are subject to change in future versions.* - - ## Computer vision made simple -How to build a computer vision system in 5 lines of python code: +How to build a working computer vision system in just 5 lines of python code: ```Python from groundlight import Groundlight gl = Groundlight() d = gl.create_detector("door", query="Is the door open?") # define with natural language -image_query = gl.submit_image_query(detector=d, image=jpeg_img) # bytes or filename +image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=10) print(f"The answer is {image_query.result}") # get the result ``` +**How does it work?** Your images are first analyzed by machine learning (ML) models which are automatically trained on your data. If those models have high enough confidence, that's your answer. But if the models are unsure, then the images are progressively escalated to more resource-intensive analysis methods up to real-time human review. So what you get is a computer vision system that starts working right away without even needing to first gather and label a dataset. At first it will operate with high latency, because people need to review the image queries. But over time, the ML systems will learn and improve so queries come back faster with higher confidence. + +*Note: The SDK is currently in "beta" phase. Interfaces are subject to change in future versions.* + ## Getting Started From 6419962d7fd8f8658096758e3613ecf47ec03474 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Thu, 10 Nov 2022 16:50:15 -0800 Subject: [PATCH 04/17] Fail-fast on test matrix. --- .github/workflows/test-integ.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integ.yaml b/.github/workflows/test-integ.yaml index d3ecd4e5..26ef5540 100644 --- a/.github/workflows/test-integ.yaml +++ b/.github/workflows/test-integ.yaml @@ -5,7 +5,7 @@ jobs: run-tests: runs-on: ubuntu-20.04 strategy: - fail-fast: false + fail-fast: true matrix: python-version: [ #"3.6", # Default on Ubuntu18.04 but openapi-generator fails From f58982296007243346686cdc0d91bbc3b42c8b82 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Thu, 10 Nov 2022 17:48:45 -0800 Subject: [PATCH 05/17] Documenting how to get tests to run. --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 8c8e00bb..8d63f638 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,32 @@ $ make generate ## Testing Most tests need an API endpoint to run. +### Getting the tests to use your current code. + +You kinda want to do a `pip install -e .` equivalent but I don't know how to do that with poetry. The ugly version is this... + +Find the directory where `groundlight` is installed: + +``` +$ python +Python 3.7.4 (default, Aug 13 2019, 20:35:49) +[GCC 7.3.0] :: Anaconda, Inc. on linux +Type "help", "copyright", "credits" or "license" for more information. +>>> import groundlight +>>> groundlight + +``` + +Then blow this away and set up a symlink from that directory to your source. + +``` +cd /home/leo/anaconda3/lib/python3.7/site-packages/ +rm -rf groundlight +ln -s ~/ptdev/groundlight-python-sdk/src/groundlight groundlight +``` + +TODO: something better. + ### Local API endpoint 1. Set up a local [janzu API From a312dda6009b2c19b2c8c8d3e56800c89f1584a7 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Fri, 11 Nov 2022 17:00:13 -0800 Subject: [PATCH 06/17] More tests --- test/assets/blankfile.jpeg | 0 test/integration/test_groundlight.py | 43 +++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 test/assets/blankfile.jpeg diff --git a/test/assets/blankfile.jpeg b/test/assets/blankfile.jpeg new file mode 100644 index 00000000..e69de29b diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index c53bedf0..3ae5741a 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -1,19 +1,24 @@ import os from datetime import datetime +import openapi_client import pytest + from groundlight import Groundlight from model import Detector, ImageQuery, PaginatedDetectorList, PaginatedImageQueryList - @pytest.fixture def gl() -> Groundlight: + """Creates a Groundlight client object for testing. + """ endpoint = os.environ.get("GROUNDLIGHT_TEST_API_ENDPOINT", "http://localhost:8000/device-api") return Groundlight(endpoint=endpoint) @pytest.fixture def detector(gl: Groundlight) -> Detector: + """Creates a new Test detector. + """ name = f"Test {datetime.utcnow()}" # Need a unique name query = "Test query?" return gl.create_detector(name=name, query=query) @@ -24,7 +29,6 @@ def image_query(gl: Groundlight, detector: Detector) -> ImageQuery: return gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg") -# @pytest.mark.skip(reason="We don't want to create a million detectors") def test_create_detector(gl: Groundlight): name = f"Test {datetime.utcnow()}" # Need a unique name query = "Test query?" @@ -33,7 +37,6 @@ def test_create_detector(gl: Groundlight): assert isinstance(_detector, Detector) -# @pytest.mark.skip(reason="We don't want to create a million detectors") def test_create_detector_with_config_name(gl: Groundlight): name = f"Test b4mu11-mlp {datetime.utcnow()}" # Need a unique name query = "Test query with b4mu11-mlp?" @@ -49,27 +52,53 @@ def test_list_detectors(gl: Groundlight): assert isinstance(detectors, PaginatedDetectorList) -# @pytest.mark.skip(reason="We don't want to create a million detectors") def test_get_detector(gl: Groundlight, detector: Detector): _detector = gl.get_detector(id=detector.id) assert str(_detector) assert isinstance(_detector, Detector) -# @pytest.mark.skip(reason="We don't want to create a million detectors and image_queries") -def test_submit_image_query(gl: Groundlight, detector: Detector): +def test_submit_image_query_filename(gl: Groundlight, detector: Detector): _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg") assert str(_image_query) assert isinstance(_image_query, ImageQuery) +def test_submit_image_query_jpeg_bytes(gl: Groundlight, detector: Detector): + jpeg = open("test/assets/dog.jpeg", "rb").read() + _image_query = gl.submit_image_query(detector=detector.id, image=jpeg) + assert str(_image_query) + assert isinstance(_image_query, ImageQuery) + + +def test_submit_image_query_jpeg_truncated(gl: Groundlight, detector: Detector): + jpeg = open("test/assets/dog.jpeg", "rb").read() + jpeg_truncated = jpeg[:-500] # Cut off the last 500 bytes + # This is an extra difficult test because the header is valid. + with pytest.raises(openapi_client.exceptions.ApiException) as exc_info: + _image_query = gl.submit_image_query(detector=detector.id, image=jpeg_truncated) + e = exc_info.value + assert e.status == 400 + + +def test_submit_image_query_bad_filename(gl: Groundlight, detector: Detector): + with pytest.raises(FileNotFoundError): + _image_query = gl.submit_image_query(detector=detector.id, image="missing-file.jpeg") + + +def test_submit_image_query_bad_jpeg_file(gl: Groundlight, detector: Detector): + with pytest.raises(ValueError) as exc_info: + _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/blankfile.jpeg") + assert "jpeg" in str(exc_info).lowe() + + + def test_list_image_queries(gl: Groundlight): image_queries = gl.list_image_queries() assert str(image_queries) assert isinstance(image_queries, PaginatedImageQueryList) -# @pytest.mark.skip(reason="We don't want to create a million detectors and image_queries") def test_get_image_query(gl: Groundlight, image_query: ImageQuery): _image_query = gl.get_image_query(id=image_query.id) assert str(_image_query) From 36b01cf778b27d73d310086845e156e5c95fdbe4 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Fri, 11 Nov 2022 17:05:35 -0800 Subject: [PATCH 07/17] Test fails because code is incomplete! :) --- test/integration/test_groundlight.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 3ae5741a..119e45b5 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -58,6 +58,13 @@ def test_get_detector(gl: Groundlight, detector: Detector): assert isinstance(_detector, Detector) +def test_submit_image_query_blocking(gl: Groundlight, detector: Detector): + # Ask for a trivially small wait so it never has time to update, but uses the code path + _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", wait=0.001) + assert str(_image_query) + assert isinstance(_image_query, ImageQuery) + + def test_submit_image_query_filename(gl: Groundlight, detector: Detector): _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg") assert str(_image_query) From 28801466d512a4c3200a61ffc0540cdfbc85df29 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Sat, 12 Nov 2022 01:06:44 +0000 Subject: [PATCH 08/17] Automatically reformatting code with black --- src/groundlight/client.py | 4 ++-- test/integration/test_groundlight.py | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index bef51bb2..54675f29 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -122,7 +122,7 @@ def submit_image_query( else: detector_id = detector image_bytesio: Union[BytesIO, BufferedReader] - #TODO: support PIL Images + # TODO: support PIL Images if isinstance(image, str): # Assume it is a filename image_bytesio = buffer_from_jpeg_file(image) @@ -143,7 +143,7 @@ def submit_image_query( img_query = self._poll_for_confident_result(img_query, threshold) return ImageQuery.parse_obj(img_query.to_dict()) - def _poll_for_confident_result(self, img_query:"ImageQuery", wait: float, threshold: float) -> "ImageQuery": + def _poll_for_confident_result(self, img_query: "ImageQuery", wait: float, threshold: float) -> "ImageQuery": start_time = time.time() while time.time() - start_time < wait: pass diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 119e45b5..0069917e 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -7,18 +7,17 @@ from groundlight import Groundlight from model import Detector, ImageQuery, PaginatedDetectorList, PaginatedImageQueryList + @pytest.fixture def gl() -> Groundlight: - """Creates a Groundlight client object for testing. - """ + """Creates a Groundlight client object for testing.""" endpoint = os.environ.get("GROUNDLIGHT_TEST_API_ENDPOINT", "http://localhost:8000/device-api") return Groundlight(endpoint=endpoint) @pytest.fixture def detector(gl: Groundlight) -> Detector: - """Creates a new Test detector. - """ + """Creates a new Test detector.""" name = f"Test {datetime.utcnow()}" # Need a unique name query = "Test query?" return gl.create_detector(name=name, query=query) @@ -80,7 +79,7 @@ def test_submit_image_query_jpeg_bytes(gl: Groundlight, detector: Detector): def test_submit_image_query_jpeg_truncated(gl: Groundlight, detector: Detector): jpeg = open("test/assets/dog.jpeg", "rb").read() - jpeg_truncated = jpeg[:-500] # Cut off the last 500 bytes + jpeg_truncated = jpeg[:-500] # Cut off the last 500 bytes # This is an extra difficult test because the header is valid. with pytest.raises(openapi_client.exceptions.ApiException) as exc_info: _image_query = gl.submit_image_query(detector=detector.id, image=jpeg_truncated) @@ -99,7 +98,6 @@ def test_submit_image_query_bad_jpeg_file(gl: Groundlight, detector: Detector): assert "jpeg" in str(exc_info).lowe() - def test_list_image_queries(gl: Groundlight): image_queries = gl.list_image_queries() assert str(image_queries) From 6bd2030b0d05abef523c66ff57c793f9b333fd62 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Fri, 11 Nov 2022 17:42:20 -0800 Subject: [PATCH 09/17] MAking blocking submit work which waits for a specified threshold. --- spec/public-api.yaml | 5 +++-- src/groundlight/client.py | 33 +++++++++++++++++++++------- src/groundlight/images.py | 2 ++ test/integration/test_groundlight.py | 3 ++- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/spec/public-api.yaml b/spec/public-api.yaml index 04dc6301..4a5d4455 100644 --- a/spec/public-api.yaml +++ b/spec/public-api.yaml @@ -1,8 +1,8 @@ openapi: 3.0.3 info: title: Groundlight API - version: 0.1.0 - description: Ask visual queries. + version: 0.6.0 + description: Easy Computer Vision powered by Natural Language contact: name: Questions? email: support@groundlight.ai @@ -273,6 +273,7 @@ components: like to use. maxLength: 100 required: + # TODO: make name optional - that's how the web version is going. - name - query x-internal: true diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 54675f29..e57b8d2b 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -1,5 +1,7 @@ -import os from io import BufferedReader, BytesIO +import logging +import os +import time from typing import Optional, Union from model import Detector, ImageQuery, PaginatedDetectorList, PaginatedImageQueryList @@ -15,6 +17,8 @@ GROUNDLIGHT_ENDPOINT = os.environ.get("GROUNDLIGHT_ENDPOINT", "https://api.groundlight.ai/device-api") +logger = logging.getLogger("groundlight") + class ApiTokenError(Exception): pass @@ -57,7 +61,10 @@ def __init__(self, endpoint: str = GROUNDLIGHT_ENDPOINT, api_token: str = None): self.detectors_api = DetectorsApi(ApiClient(configuration)) self.image_queries_api = ImageQueriesApi(ApiClient(configuration)) - def get_detector(self, id: str) -> Detector: + def get_detector(self, id:Union[str, Detector]) -> Detector: + if isinstance(id, Detector): + # Short-circuit + return id obj = self.detectors_api.get_detector(id=id) return Detector.parse_obj(obj.to_dict()) @@ -137,14 +144,24 @@ def submit_image_query( "Unsupported type for image. We only support JPEG images specified through a filename, bytes, BytesIO, or BufferedReader object." ) - img_query = self.image_queries_api.submit_image_query(detector_id=detector_id, body=image_bytesio) + raw_img_query = self.image_queries_api.submit_image_query(detector_id=detector_id, body=image_bytesio) + img_query = ImageQuery.parse_obj(raw_img_query.to_dict()) if wait: - threshold = confidence_threshold_for_detector(detector) - img_query = self._poll_for_confident_result(img_query, threshold) - return ImageQuery.parse_obj(img_query.to_dict()) + threshold = self.get_detector(detector).confidence_threshold + img_query = self._poll_for_confident_result(img_query, wait, threshold) + return img_query - def _poll_for_confident_result(self, img_query: "ImageQuery", wait: float, threshold: float) -> "ImageQuery": + def _poll_for_confident_result(self, img_query: ImageQuery, wait: float, threshold: float) -> ImageQuery: + """Polls on an image query waiting for the result to reach the specified confidence. + """ start_time = time.time() + delay = 0.1 while time.time() - start_time < wait: - pass + current_confidence = img_query.result.confidence + if current_confidence >= threshold: + break + logger.debug(f"Polling for updated image_query because confidence {current_confidence:.3f} < {threshold:.3f}") + time.sleep(delay) + delay *= 1.4 # slow exponential backoff + img_query = self.get_image_query(img_query.id) return img_query diff --git a/src/groundlight/images.py b/src/groundlight/images.py index f68d311c..fb30cd41 100644 --- a/src/groundlight/images.py +++ b/src/groundlight/images.py @@ -9,6 +9,8 @@ def buffer_from_jpeg_file(image_filename: str) -> io.BufferedReader: For now, we only support JPEG files, and raise an ValueError otherwise. """ if imghdr.what(image_filename) == "jpeg": + # Note this will get fooled by truncated binaries since it only reads the header. + # That's okay - the server will catch it. return open(image_filename, "rb") else: raise ValueError("We only support JPEG files, for now.") diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 0069917e..d4a403ad 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -59,7 +59,7 @@ def test_get_detector(gl: Groundlight, detector: Detector): def test_submit_image_query_blocking(gl: Groundlight, detector: Detector): # Ask for a trivially small wait so it never has time to update, but uses the code path - _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", wait=0.001) + _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", wait=5) assert str(_image_query) assert isinstance(_image_query, ImageQuery) @@ -81,6 +81,7 @@ def test_submit_image_query_jpeg_truncated(gl: Groundlight, detector: Detector): jpeg = open("test/assets/dog.jpeg", "rb").read() jpeg_truncated = jpeg[:-500] # Cut off the last 500 bytes # This is an extra difficult test because the header is valid. + # So a casual check of the image will appear valid. with pytest.raises(openapi_client.exceptions.ApiException) as exc_info: _image_query = gl.submit_image_query(detector=detector.id, image=jpeg_truncated) e = exc_info.value From 2a38ecf3494d5dc89bc63b544619ca678f0cce03 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Fri, 11 Nov 2022 17:42:58 -0800 Subject: [PATCH 10/17] Fixup typo in test --- test/integration/test_groundlight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index d4a403ad..abfe9546 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -96,7 +96,7 @@ def test_submit_image_query_bad_filename(gl: Groundlight, detector: Detector): def test_submit_image_query_bad_jpeg_file(gl: Groundlight, detector: Detector): with pytest.raises(ValueError) as exc_info: _image_query = gl.submit_image_query(detector=detector.id, image="test/assets/blankfile.jpeg") - assert "jpeg" in str(exc_info).lowe() + assert "jpeg" in str(exc_info).lower() def test_list_image_queries(gl: Groundlight): From cbc528b8723079f81f068b4b1f762b32a4e31abd Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Sat, 12 Nov 2022 01:43:32 +0000 Subject: [PATCH 11/17] Automatically reformatting code with black --- src/groundlight/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index e57b8d2b..6b8e767f 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -61,7 +61,7 @@ def __init__(self, endpoint: str = GROUNDLIGHT_ENDPOINT, api_token: str = None): self.detectors_api = DetectorsApi(ApiClient(configuration)) self.image_queries_api = ImageQueriesApi(ApiClient(configuration)) - def get_detector(self, id:Union[str, Detector]) -> Detector: + def get_detector(self, id: Union[str, Detector]) -> Detector: if isinstance(id, Detector): # Short-circuit return id @@ -152,15 +152,16 @@ def submit_image_query( return img_query def _poll_for_confident_result(self, img_query: ImageQuery, wait: float, threshold: float) -> ImageQuery: - """Polls on an image query waiting for the result to reach the specified confidence. - """ + """Polls on an image query waiting for the result to reach the specified confidence.""" start_time = time.time() delay = 0.1 while time.time() - start_time < wait: current_confidence = img_query.result.confidence if current_confidence >= threshold: break - logger.debug(f"Polling for updated image_query because confidence {current_confidence:.3f} < {threshold:.3f}") + logger.debug( + f"Polling for updated image_query because confidence {current_confidence:.3f} < {threshold:.3f}" + ) time.sleep(delay) delay *= 1.4 # slow exponential backoff img_query = self.get_image_query(img_query.id) From a4d0b59887ef6da1462f16f4f3ab7a8532dd18b6 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Sat, 12 Nov 2022 09:57:19 -0800 Subject: [PATCH 12/17] Fixed to recognize None confidence as human label. --- src/groundlight/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 6b8e767f..23be8343 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -157,7 +157,11 @@ def _poll_for_confident_result(self, img_query: ImageQuery, wait: float, thresho delay = 0.1 while time.time() - start_time < wait: current_confidence = img_query.result.confidence + if current_confidence is None: + logging.debug(f"Image query with None confidence implies human label (for now)") + break if current_confidence >= threshold: + logging.debug(f"Image query confidence {current_confidence:.3f} above {threshold:.3f}") break logger.debug( f"Polling for updated image_query because confidence {current_confidence:.3f} < {threshold:.3f}" From a16d2dacbeec9922152ce2f8c0524c9e3facfa03 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Sat, 12 Nov 2022 10:13:30 -0800 Subject: [PATCH 13/17] Adding code samples. --- samples/README.md | 2 ++ samples/blocking_submit.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 samples/README.md create mode 100644 samples/blocking_submit.py diff --git a/samples/README.md b/samples/README.md new file mode 100644 index 00000000..25df1f15 --- /dev/null +++ b/samples/README.md @@ -0,0 +1,2 @@ +Code samples + diff --git a/samples/blocking_submit.py b/samples/blocking_submit.py new file mode 100644 index 00000000..18e82d13 --- /dev/null +++ b/samples/blocking_submit.py @@ -0,0 +1,14 @@ +"""Example of how to wait for a confident result +""" +import logging +logging.basicConfig(level=logging.DEBUG) + +from groundlight import Groundlight + +gl = Groundlight() + +d = gl.get_or_create_detector(name="dog", query="is there a dog in the picture?") + +print(f"Submitting image query") +iq = gl.submit_image_query(d, image="../test/assets/dog.jpeg", wait=30) +print(iq) From 2ce94a282b33da5fd93e180e9f10082e47cd84b7 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Sat, 12 Nov 2022 18:13:57 +0000 Subject: [PATCH 14/17] Automatically reformatting code with black --- samples/blocking_submit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/blocking_submit.py b/samples/blocking_submit.py index 18e82d13..8faa2a5c 100644 --- a/samples/blocking_submit.py +++ b/samples/blocking_submit.py @@ -1,6 +1,7 @@ """Example of how to wait for a confident result """ import logging + logging.basicConfig(level=logging.DEBUG) from groundlight import Groundlight From 6fb665b1b61163c16d80ef0e707d66ea77d25b64 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Sun, 13 Nov 2022 12:41:29 -0800 Subject: [PATCH 15/17] Adding latency vs confidence trade-off to user guide. --- UserGuide.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/UserGuide.md b/UserGuide.md index 118f77b9..27cb2742 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -10,7 +10,7 @@ How to build a working computer vision system in just 5 lines of python code: from groundlight import Groundlight gl = Groundlight() d = gl.create_detector("door", query="Is the door open?") # define with natural language -image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=10) +image_query = gl.submit_image_query(detector=d, image=jpeg_img) # send in an image print(f"The answer is {image_query.result}") # get the result ``` @@ -19,6 +19,33 @@ print(f"The answer is {image_query.result}") # get the result *Note: The SDK is currently in "beta" phase. Interfaces are subject to change in future versions.* +## Managing confidence levels and latency + +Groundlight gives you a simple way to control the trade-off of latency against accuracy. The longer you can wait for an answer to your image query, the better accuracy you will get. In particular, if the ML models are unsure of the best response, they will escalate the image query to a real-time human monitor to review them. Your code can easily wait for this delayed response. + +The desired confidence level is set as the escalation threshold on your detector. This determines what is the minimum confidence score for the ML system to provide before the image query is escalated to a human monitor. + +For example, say you want to set your desired confidence level to 0.95 and that you're willing to wait up to 30 seconds to get a response. + +```Python +d = gl.create_detector("lights", query="Are the lights on?", confidence=0.95) +image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=30) +# This will wait until either 30 seconds have passed or the confidence reaches 0.95 +print(f"The answer is {image_query.result}") +``` + +Or if you want to run as fast as possible, set `wait=0`. This way you will only get the ML results, without waiting for human review. Image queries which are below the desired confidence level still get escalated to human review, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. + +```Python +image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=0) +``` + +You can see the confidence score returned for the image query: + +```Python +print(f"The confidence is {image_query.result.confidence}") +``` + ## Getting Started 1. Install the `groundlight` SDK. Requires python version 3.7 or higher. See [prerequisites](#Prerequisites). @@ -44,6 +71,7 @@ $ python3 glapp.py ``` + ## Prerequisites ### Using Groundlight SDK on Ubuntu 18.04 @@ -124,6 +152,7 @@ gl = Groundlight() try: detectors = gl.list_detectors() except ApiException as e: + # Many fields available to describe the error print(e) print(e.args) print(e.body) From 83d1bd683f0619e09343e1fa2f7e50122e6cf029 Mon Sep 17 00:00:00 2001 From: Leo Dirac Date: Sun, 13 Nov 2022 16:56:37 -0800 Subject: [PATCH 16/17] Changing second example to trash can. --- UserGuide.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/UserGuide.md b/UserGuide.md index 27cb2742..9617e07a 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -25,11 +25,11 @@ Groundlight gives you a simple way to control the trade-off of latency against a The desired confidence level is set as the escalation threshold on your detector. This determines what is the minimum confidence score for the ML system to provide before the image query is escalated to a human monitor. -For example, say you want to set your desired confidence level to 0.95 and that you're willing to wait up to 30 seconds to get a response. +For example, say you want to set your desired confidence level to 0.95, but that you're willing to wait up to 60 seconds to get a confident response. ```Python -d = gl.create_detector("lights", query="Are the lights on?", confidence=0.95) -image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=30) +d = gl.create_detector("trash", query="Is the trash can full?", confidence=0.95) +image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=60) # This will wait until either 30 seconds have passed or the confidence reaches 0.95 print(f"The answer is {image_query.result}") ``` From 2534d92cede7323a036fd738f1337b630cbab882 Mon Sep 17 00:00:00 2001 From: positavi <79671823+positavi@users.noreply.github.com> Date: Mon, 14 Nov 2022 11:12:58 -0800 Subject: [PATCH 17/17] Update UserGuide.md updated verbage --- UserGuide.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/UserGuide.md b/UserGuide.md index 9617e07a..0b2287d9 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -21,9 +21,9 @@ print(f"The answer is {image_query.result}") # get the result ## Managing confidence levels and latency -Groundlight gives you a simple way to control the trade-off of latency against accuracy. The longer you can wait for an answer to your image query, the better accuracy you will get. In particular, if the ML models are unsure of the best response, they will escalate the image query to a real-time human monitor to review them. Your code can easily wait for this delayed response. +Groundlight gives you a simple way to control the trade-off of latency against accuracy. The longer you can wait for an answer to your image query, the better accuracy you can get. In particular, if the ML models are unsure of the best response, they will escalate the image query to more intensive analysis with more complex models and real-time human monitors as needed. Your code can easily wait for this delayed response. Either way, these new results are automatically trained into your models so your next queries will get better results faster. -The desired confidence level is set as the escalation threshold on your detector. This determines what is the minimum confidence score for the ML system to provide before the image query is escalated to a human monitor. +The desired confidence level is set as the escalation threshold on your detector. This determines what is the minimum confidence score for the ML system to provide before the image query is escalated. For example, say you want to set your desired confidence level to 0.95, but that you're willing to wait up to 60 seconds to get a confident response. @@ -34,7 +34,7 @@ image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=60) print(f"The answer is {image_query.result}") ``` -Or if you want to run as fast as possible, set `wait=0`. This way you will only get the ML results, without waiting for human review. Image queries which are below the desired confidence level still get escalated to human review, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. +Or if you want to run as fast as possible, set `wait=0`. This way you will only get the ML results, without waiting for escalation. Image queries which are below the desired confidence level still be escalated for further analysis, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. ```Python image_query = gl.submit_image_query(detector=d, image=jpeg_img, wait=0)