Skip to content

Commit

Permalink
feat(automl): add support for image classification, image object dete…
Browse files Browse the repository at this point in the history
…ction, text classification, text extraction; add `batch_predict`; add `deploy_model`, `undeploy_model`, `export_model`; add annotation specs (via synth) (#9628)
  • Loading branch information
yoshi-automation authored and busunkim96 committed Nov 13, 2019
1 parent 2d14cb6 commit 43d5607
Show file tree
Hide file tree
Showing 75 changed files with 11,648 additions and 386 deletions.
386 changes: 385 additions & 1 deletion automl/google/cloud/automl_v1/gapic/auto_ml_client.py

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions automl/google/cloud/automl_v1/gapic/auto_ml_client_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
},
"GetAnnotationSpec": {
"timeout_millis": 5000,
"retry_codes_name": "idempotent",
"retry_params_name": "default",
},
"CreateModel": {
"timeout_millis": 20000,
"retry_codes_name": "non_idempotent",
Expand All @@ -77,6 +82,21 @@
"retry_codes_name": "idempotent",
"retry_params_name": "default",
},
"DeployModel": {
"timeout_millis": 5000,
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
},
"UndeployModel": {
"timeout_millis": 5000,
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
},
"ExportModel": {
"timeout_millis": 5000,
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
},
"GetModelEvaluation": {
"timeout_millis": 5000,
"retry_codes_name": "idempotent",
Expand Down
74 changes: 74 additions & 0 deletions automl/google/cloud/automl_v1/gapic/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,80 @@
import enum


class ClassificationType(enum.IntEnum):
"""
Type of the classification problem.
Attributes:
CLASSIFICATION_TYPE_UNSPECIFIED (int): An un-set value of this enum.
MULTICLASS (int): At most one label is allowed per example.
MULTILABEL (int): Multiple labels are allowed for one example.
"""

CLASSIFICATION_TYPE_UNSPECIFIED = 0
MULTICLASS = 1
MULTILABEL = 2


class Document(object):
class Layout(object):
class TextSegmentType(enum.IntEnum):
"""
The type of TextSegment in the context of the original document.
Attributes:
TEXT_SEGMENT_TYPE_UNSPECIFIED (int): Should not be used.
TOKEN (int): The text segment is a token. e.g. word.
PARAGRAPH (int): The text segment is a paragraph.
FORM_FIELD (int): The text segment is a form field.
FORM_FIELD_NAME (int): The text segment is the name part of a form field. It will be treated as
child of another FORM\_FIELD TextSegment if its span is subspan of
another TextSegment with type FORM\_FIELD.
FORM_FIELD_CONTENTS (int): The text segment is the text content part of a form field. It will be
treated as child of another FORM\_FIELD TextSegment if its span is
subspan of another TextSegment with type FORM\_FIELD.
TABLE (int): The text segment is a whole table, including headers, and all rows.
TABLE_HEADER (int): The text segment is a table's headers. It will be treated as child of
another TABLE TextSegment if its span is subspan of another TextSegment
with type TABLE.
TABLE_ROW (int): The text segment is a row in table. It will be treated as child of
another TABLE TextSegment if its span is subspan of another TextSegment
with type TABLE.
TABLE_CELL (int): The text segment is a cell in table. It will be treated as child of
another TABLE\_ROW TextSegment if its span is subspan of another
TextSegment with type TABLE\_ROW.
"""

TEXT_SEGMENT_TYPE_UNSPECIFIED = 0
TOKEN = 1
PARAGRAPH = 2
FORM_FIELD = 3
FORM_FIELD_NAME = 4
FORM_FIELD_CONTENTS = 5
TABLE = 6
TABLE_HEADER = 7
TABLE_ROW = 8
TABLE_CELL = 9


class DocumentDimensions(object):
class DocumentDimensionUnit(enum.IntEnum):
"""
Unit of the document dimension.
Attributes:
DOCUMENT_DIMENSION_UNIT_UNSPECIFIED (int): Should not be used.
INCH (int): Document dimension is measured in inches.
CENTIMETER (int): Document dimension is measured in centimeters.
POINT (int): Document dimension is measured in points. 72 points = 1 inch.
"""

DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0
INCH = 1
CENTIMETER = 2
POINT = 3


class Model(object):
class DeploymentState(enum.IntEnum):
"""
Expand Down
166 changes: 166 additions & 0 deletions automl/google/cloud/automl_v1/gapic/prediction_service_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,18 @@
import google.api_core.gapic_v1.method
import google.api_core.gapic_v1.routing_header
import google.api_core.grpc_helpers
import google.api_core.operation
import google.api_core.operations_v1
import google.api_core.path_template
import grpc

from google.cloud.automl_v1.gapic import enums
from google.cloud.automl_v1.gapic import prediction_service_client_config
from google.cloud.automl_v1.gapic.transports import prediction_service_grpc_transport
from google.cloud.automl_v1.proto import annotation_spec_pb2
from google.cloud.automl_v1.proto import data_items_pb2
from google.cloud.automl_v1.proto import dataset_pb2
from google.cloud.automl_v1.proto import image_pb2
from google.cloud.automl_v1.proto import io_pb2
from google.cloud.automl_v1.proto import model_evaluation_pb2
from google.cloud.automl_v1.proto import model_pb2
Expand Down Expand Up @@ -222,8 +226,18 @@ def predict(
returned in the response. Available for following ML problems, and their
expected request payloads:
- Image Classification - Image in .JPEG, .GIF or .PNG format,
image\_bytes up to 30MB.
- Image Object Detection - Image in .JPEG, .GIF or .PNG format,
image\_bytes up to 30MB.
- Text Classification - TextSnippet, content up to 60,000 characters,
UTF-8 encoded.
- Text Extraction - TextSnippet, content up to 30,000 characters, UTF-8
NFC encoded.
- Translation - TextSnippet, content up to 25,000 characters, UTF-8
encoded.
- Text Sentiment - TextSnippet, content up 500 characters, UTF-8
encoded.
Example:
>>> from google.cloud import automl_v1
Expand All @@ -246,6 +260,19 @@ def predict(
message :class:`~google.cloud.automl_v1.types.ExamplePayload`
params (dict[str -> str]): Additional domain-specific parameters, any string must be up to 25000
characters long.
- For Image Classification:
``score_threshold`` - (float) A value from 0.0 to 1.0. When the model
makes predictions for an image, it will only produce results that
have at least this confidence score. The default is 0.5.
- For Image Object Detection: ``score_threshold`` - (float) When Model
detects objects on the image, it will only produce bounding boxes
which have at least this confidence score. Value in 0 to 1 range,
default is 0.5. ``max_bounding_box_count`` - (int64) No more than
this number of bounding boxes will be returned in the response.
Default is 100, the requested value may be limited by server.
retry (Optional[google.api_core.retry.Retry]): A retry object used
to retry requests. If ``None`` is specified, requests will
be retried using a default configuration.
Expand Down Expand Up @@ -295,3 +322,142 @@ def predict(
return self._inner_api_calls["predict"](
request, retry=retry, timeout=timeout, metadata=metadata
)

def batch_predict(
self,
name,
input_config,
output_config,
params=None,
retry=google.api_core.gapic_v1.method.DEFAULT,
timeout=google.api_core.gapic_v1.method.DEFAULT,
metadata=None,
):
"""
Perform a batch prediction. Unlike the online ``Predict``, batch
prediction result won't be immediately available in the response.
Instead, a long running operation object is returned. User can poll the
operation result via ``GetOperation`` method. Once the operation is
done, ``BatchPredictResult`` is returned in the ``response`` field.
Available for following ML problems:
- Image Classification
- Image Object Detection
- Text Extraction
Example:
>>> from google.cloud import automl_v1
>>>
>>> client = automl_v1.PredictionServiceClient()
>>>
>>> name = client.model_path('[PROJECT]', '[LOCATION]', '[MODEL]')
>>>
>>> # TODO: Initialize `input_config`:
>>> input_config = {}
>>>
>>> # TODO: Initialize `output_config`:
>>> output_config = {}
>>>
>>> response = client.batch_predict(name, input_config, output_config)
>>>
>>> def callback(operation_future):
... # Handle result.
... result = operation_future.result()
>>>
>>> response.add_done_callback(callback)
>>>
>>> # Handle metadata.
>>> metadata = response.metadata()
Args:
name (str): Name of the model requested to serve the batch prediction.
input_config (Union[dict, ~google.cloud.automl_v1.types.BatchPredictInputConfig]): Required. The input configuration for batch prediction.
If a dict is provided, it must be of the same form as the protobuf
message :class:`~google.cloud.automl_v1.types.BatchPredictInputConfig`
output_config (Union[dict, ~google.cloud.automl_v1.types.BatchPredictOutputConfig]): Required. The Configuration specifying where output predictions should
be written.
If a dict is provided, it must be of the same form as the protobuf
message :class:`~google.cloud.automl_v1.types.BatchPredictOutputConfig`
params (dict[str -> str]): Additional domain-specific parameters for the predictions, any string
must be up to 25000 characters long.
- For Text Classification:
``score_threshold`` - (float) A value from 0.0 to 1.0. When the model
makes predictions for a text snippet, it will only produce results
that have at least this confidence score. The default is 0.5.
- For Image Classification:
``score_threshold`` - (float) A value from 0.0 to 1.0. When the model
makes predictions for an image, it will only produce results that
have at least this confidence score. The default is 0.5.
- For Image Object Detection:
``score_threshold`` - (float) When Model detects objects on the
image, it will only produce bounding boxes which have at least this
confidence score. Value in 0 to 1 range, default is 0.5.
``max_bounding_box_count`` - (int64) No more than this number of
bounding boxes will be produced per image. Default is 100, the
requested value may be limited by server.
retry (Optional[google.api_core.retry.Retry]): A retry object used
to retry requests. If ``None`` is specified, requests will
be retried using a default configuration.
timeout (Optional[float]): The amount of time, in seconds, to wait
for the request to complete. Note that if ``retry`` is
specified, the timeout applies to each individual attempt.
metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata
that is provided to the method.
Returns:
A :class:`~google.cloud.automl_v1.types._OperationFuture` instance.
Raises:
google.api_core.exceptions.GoogleAPICallError: If the request
failed for any reason.
google.api_core.exceptions.RetryError: If the request failed due
to a retryable error and retry attempts failed.
ValueError: If the parameters are invalid.
"""
# Wrap the transport method to add retry and timeout logic.
if "batch_predict" not in self._inner_api_calls:
self._inner_api_calls[
"batch_predict"
] = google.api_core.gapic_v1.method.wrap_method(
self.transport.batch_predict,
default_retry=self._method_configs["BatchPredict"].retry,
default_timeout=self._method_configs["BatchPredict"].timeout,
client_info=self._client_info,
)

request = prediction_service_pb2.BatchPredictRequest(
name=name,
input_config=input_config,
output_config=output_config,
params=params,
)
if metadata is None:
metadata = []
metadata = list(metadata)
try:
routing_header = [("name", name)]
except AttributeError:
pass
else:
routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata(
routing_header
)
metadata.append(routing_metadata)

operation = self._inner_api_calls["batch_predict"](
request, retry=retry, timeout=timeout, metadata=metadata
)
return google.api_core.operation.from_gapic(
operation,
self.transport._operations_client,
prediction_service_pb2.BatchPredictResult,
metadata_type=proto_operations_pb2.OperationMetadata,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
"timeout_millis": 60000,
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
}
},
"BatchPredict": {
"timeout_millis": 20000,
"retry_codes_name": "non_idempotent",
"retry_params_name": "default",
},
},
}
}
Expand Down
Loading

0 comments on commit 43d5607

Please sign in to comment.