Skip to content

Commit

Permalink
feat: search statistics (#1616)
Browse files Browse the repository at this point in the history
* experimental tweaks

* feat: adds two search statistics classes and property

* removes several personal debugging sentinels

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* adds tests

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* cleans up conflict

* adds comment

* adds some type hints, adds a test for SearchReasons

* cleans up some comments

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Update tests/unit/job/test_query_stats.py

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* updated type checks to be isinstance checks per linter

* update linting

* Update tests/unit/job/test_query_stats.py

* Update tests/unit/job/test_query_stats.py

* experiments with some tests that are failing

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Fix linting

* update package verification approach

* update pandas installed version constant

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* remove unused package

* set pragma no cover

* adds controls to skip testing if pandas exceeds 2.0

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* adds pragma no cover to a simple check

* add checks against pandas 2.0 on system test

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* experiments with some tests that are failing

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* resolves merge conflict

* resolves merge conflict

* resolve conflicts

* resolve merge conflicts

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* updates due to faulty confict resolution

* adds docstrings to two classes

* corrects formatting

* Update tests/unit/job/test_query_stats.py

* Update tests/unit/job/test_query_stats.py

* updates default values and corrects mypy errors

* corrects linting

* Update google/cloud/bigquery/job/query.py

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
chalmerlowe and gcf-owl-bot[bot] committed Sep 2, 2023
1 parent 3645e32 commit b930e46
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 1 deletion.
63 changes: 62 additions & 1 deletion google/cloud/bigquery/job/query.py
Expand Up @@ -198,6 +198,59 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":
return cls(*args)


class IndexUnusedReason(typing.NamedTuple):
"""Reason about why no search index was used in the search query (or sub-query).
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#indexunusedreason
"""

code: Optional[str] = None
"""Specifies the high-level reason for the scenario when no search index was used.
"""

message: Optional[str] = None
"""Free form human-readable reason for the scenario when no search index was used.
"""

baseTable: Optional[TableReference] = None
"""Specifies the base table involved in the reason that no search index was used.
"""

indexName: Optional[str] = None
"""Specifies the name of the unused search index, if available."""

@classmethod
def from_api_repr(cls, reason):
code = reason.get("code")
message = reason.get("message")
baseTable = reason.get("baseTable")
indexName = reason.get("indexName")

return cls(code, message, baseTable, indexName)


class SearchStats(typing.NamedTuple):
"""Statistics related to Search Queries. Populated as part of JobStatistics2.
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#searchstatistics
"""

mode: Optional[str] = None
"""Indicates the type of search index usage in the entire search query."""

reason: List[IndexUnusedReason] = []
"""Reason about why no search index was used in the search query (or sub-query)"""

@classmethod
def from_api_repr(cls, stats: Dict[str, Any]):
mode = stats.get("indexUsageMode", None)
reason = [
IndexUnusedReason.from_api_repr(r)
for r in stats.get("indexUnusedReasons", [])
]
return cls(mode, reason)


class ScriptOptions:
"""Options controlling the execution of scripts.
Expand Down Expand Up @@ -724,7 +777,6 @@ def to_api_repr(self) -> dict:
Dict: A dictionary in the format used by the BigQuery API.
"""
resource = copy.deepcopy(self._properties)

# Query parameters have an addition property associated with them
# to indicate if the query is using named or positional parameters.
query_parameters = resource["query"].get("queryParameters")
Expand Down Expand Up @@ -858,6 +910,15 @@ def priority(self):
"""
return self.configuration.priority

@property
def search_stats(self) -> Optional[SearchStats]:
"""Returns a SearchStats object."""

stats = self._job_statistics().get("searchStatistics")
if stats is not None:
return SearchStats.from_api_repr(stats)
return None

@property
def query(self):
"""str: The query text used in this query job.
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/job/test_query.py
Expand Up @@ -911,6 +911,28 @@ def test_dml_stats(self):
assert isinstance(job.dml_stats, DmlStats)
assert job.dml_stats.inserted_row_count == 35

def test_search_stats(self):
from google.cloud.bigquery.job.query import SearchStats

client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, self.QUERY, client)
assert job.search_stats is None

statistics = job._properties["statistics"] = {}
assert job.search_stats is None

query_stats = statistics["query"] = {}
assert job.search_stats is None

query_stats["searchStatistics"] = {
"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED",
"indexUnusedReasons": [],
}
# job.search_stats is a daisy-chain of calls and gets:
# job.search_stats << job._job_statistics << job._properties
assert isinstance(job.search_stats, SearchStats)
assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED"

def test_result(self):
from google.cloud.bigquery.table import RowIterator

Expand Down
69 changes: 69 additions & 0 deletions tests/unit/job/test_query_stats.py
Expand Up @@ -108,6 +108,75 @@ def test_from_api_repr_full_stats(self):
assert result.updated_row_count == 4


class TestSearchStatistics:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import SearchStats

return SearchStats

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def test_ctor_defaults(self):
search_stats = self._make_one()
assert search_stats.mode is None
assert search_stats.reason == []

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", "indexUnusedReasons": []}
)

assert isinstance(result, klass)
assert result.mode == "INDEX_USAGE_MODE_UNSPECIFIED"
assert result.reason == []


class TestIndexUnusedReason:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import IndexUnusedReason

return IndexUnusedReason

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def test_ctor_defaults(self):
search_reason = self._make_one()
assert search_reason.code is None
assert search_reason.message is None
assert search_reason.baseTable is None
assert search_reason.indexName is None

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{
"code": "INDEX_CONFIG_NOT_AVAILABLE",
"message": "There is no search index...",
"baseTable": {
"projectId": "bigquery-public-data",
"datasetId": "usa_names",
"tableId": "usa_1910_current",
},
"indexName": None,
}
)

assert isinstance(result, klass)
assert result.code == "INDEX_CONFIG_NOT_AVAILABLE"
assert result.message == "There is no search index..."
assert result.baseTable == {
"projectId": "bigquery-public-data",
"datasetId": "usa_names",
"tableId": "usa_1910_current",
}
assert result.indexName is None


class TestQueryPlanEntryStep(_Base):
KIND = "KIND"
SUBSTEPS = ("SUB1", "SUB2")
Expand Down

0 comments on commit b930e46

Please sign in to comment.