great-expectations · Kilo59 · Apr 1, 2024 · Mar 29, 2024 · Mar 30, 2024 · Mar 30, 2024
diff --git a/docs/checks/docs_link_checker.py b/docs/checks/docs_link_checker.py
@@ -279,7 +279,7 @@ def _check_link(  # noqa: PLR0912, C901 # too complex
             if match:
                 result = self._check_relative_image(link, file, match.group("path"))
             else:
-                match = self._absolute_image_pattern.match(link)  # type: ignore[assignment]
+                match = self._absolute_image_pattern.match(link)
                 if match:
                     result = self._check_absolute_image(link, file, match.group("path"))
                 else:
@@ -289,17 +289,17 @@ def _check_link(  # noqa: PLR0912, C901 # too complex
             if match:
                 result = self._check_relative_link(link, file, match.group("path"))
             else:
-                match = self._absolute_link_pattern.match(link)  # type: ignore[assignment]
+                match = self._absolute_link_pattern.match(link)
                 if match:
                     result = self._check_absolute_link(
                         link, file, match.group("path"), match.group("version")
                     )
-                elif match := self._absolute_file_pattern.match(link):  # type: ignore[assignment]
+                elif match := self._absolute_file_pattern.match(link):
                     # This could be more robust like the other checks, but the level of complexity will be high for versioned_docs,
                     # and we should be able to just set onBrokenMarkdownLinks: 'error'
                     result = None
                 else:
-                    match = self._docroot_link_pattern.match(link)  # type: ignore[assignment]
+                    match = self._docroot_link_pattern.match(link)
                     if match:
                         result = self._check_docroot_link(
                             link, file, match.group("path")

diff --git a/docs/sphinx_api_docs_source/build_sphinx_api_docs.py b/docs/sphinx_api_docs_source/build_sphinx_api_docs.py
@@ -215,7 +215,7 @@ def _parse_and_process_html_to_mdx(  # noqa: C901
         soup = BeautifulSoup(html_file_contents, "html.parser")
 
         # Retrieve and remove the title (it will also be autogenerated by docusaurus)
-        title = soup.find("h1").extract()
+        title = soup.find("h1").extract()  # type: ignore[union-attr]
         title_str = title.get_text(strip=True)
         title_str = title_str.replace("#", "")
 
@@ -234,10 +234,10 @@ def _parse_and_process_html_to_mdx(  # noqa: C901
 
         # Add class="sphinx-api-doc" to section tag to reference in css
         doc = soup.find("section")
-        doc["class"] = "sphinx-api-doc"
+        doc["class"] = "sphinx-api-doc"  # type: ignore[index]
 
         # Change style to styles to avoid rendering errors
-        tags_with_style = doc.find_all(
+        tags_with_style = doc.find_all(  # type: ignore[union-attr]
             "col",
             style=lambda value: value in value,  # noqa: PLR0124
         )
@@ -247,7 +247,7 @@ def _parse_and_process_html_to_mdx(  # noqa: C901
             tag["styles"] = style
 
         # Process documentation links
-        external_refs = doc.find_all(class_="reference external")
+        external_refs = doc.find_all(class_="reference external")  # type: ignore[union-attr]
         for external_ref in external_refs:
             url = external_ref.string
             url_parts = urlparse(url)
@@ -264,7 +264,7 @@ def _parse_and_process_html_to_mdx(  # noqa: C901
         #     - Ensure links continue to work even after cutting new docs versions
         #     - Ensure links continue to work regardless of host url
         #  - Remove fragments from links; docusaurus currently errors on these.
-        internal_refs = doc.find_all(class_="reference internal")
+        internal_refs = doc.find_all(class_="reference internal")  # type: ignore[union-attr]
         for internal_ref in internal_refs:
             href = internal_ref["href"]
 
@@ -323,7 +323,7 @@ def _get_sidebar_entry(self, html_file_path: pathlib.Path) -> SidebarEntry:
         Returns:
             SidebarEntry corresponding to html file path.
         """
-        return self.sidebar_entries.get(html_file_path.stem)
+        return self.sidebar_entries.get(html_file_path.stem)  # type: ignore[return-value]
 
     def _get_mdx_file_path(self, sidebar_entry: SidebarEntry) -> pathlib.Path:
         """Get the mdx file path for a processed sphinx-generated html file.

diff --git a/docs/sphinx_api_docs_source/public_api_report.py b/docs/sphinx_api_docs_source/public_api_report.py
@@ -733,8 +733,9 @@ def _is_definition_excluded(self, definition: Definition) -> bool:
         definitions_excluded = [d for d in self.excludes if d.name and d.filepath]
         for definition_excluded in definitions_excluded:
             filepath_excluded = self._repo_relative_filepath_comparison(
-                definition.filepath, definition_excluded.filepath
-            )  # type: ignore[arg-type]
+                definition.filepath,
+                definition_excluded.filepath,  # type: ignore[arg-type]
+            )
             name_excluded = definition.name == definition_excluded.name
             if filepath_excluded and name_excluded:
                 return True
@@ -750,8 +751,9 @@ def _is_definition_included(self, definition: Definition) -> bool:
         definitions_included = [d for d in self.includes if d.name and d.filepath]
         for definition_included in definitions_included:
             filepath_included = self._repo_relative_filepath_comparison(
-                definition.filepath, definition_included.filepath
-            )  # type: ignore[arg-type]
+                definition.filepath,
+                definition_included.filepath,  # type: ignore[arg-type]
+            )
             name_included = definition.name == definition_included.name
             if filepath_included and name_included:
                 return True
@@ -845,16 +847,16 @@ def _default_code_absolute_paths() -> Set[pathlib.Path]:
     """All Great Expectations modules related to the main library."""
     base_directory = _repo_root() / "great_expectations"
     paths = base_directory.rglob("**/*.py")
-    return {pathlib.Path(p) for p in paths}
+    return set(paths)
 
 
 def _default_docs_absolute_paths() -> Set[pathlib.Path]:
     """All Great Expectations modules related to the main library."""
     base_directory = _repo_root() / "docs"
-    paths = []
+    paths: list[pathlib.Path] = []
     for extension in ("md", "mdx", "yml", "yaml"):
         paths.extend(base_directory.rglob(f"**/*.{extension}"))
-    return {pathlib.Path(p) for p in paths}
+    return set(paths)
 
 
 def _parse_file_to_ast_tree(filepath: pathlib.Path) -> ast.AST:

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,12 +13,7 @@ plugins = [
 files = [
     "great_expectations",
     "docs",
-    "tests/datasource/fluent",
-    "tests/integration/cloud",
-    "tests/integration/docusaurus",
-    "tests/core/test_batch_config.py",
-    "tests/validator/test_v1_validator.py",
-    'tests/expectations/core/test_expect_column_values_to_be_present_in_other_table.py',
+    "tests",
     # "contrib" # ignore entire `contrib` package
 ]
 warn_unused_configs = true
@@ -128,14 +123,60 @@ exclude = [
     'validation_operators/types/validation_operator_result\.py',                                           # 35
     'validation_operators/validation_operators\.py',                                                       # 16
     # tests
+    'tests/actions/test_core_actions\.py',
+    'tests/checkpoint/test_checkpoint_result_format\.py',
+    'tests/checkpoint/test_checkpoint\.py',
+    'tests/core/test_batch\.py',
+    'tests/core/test_expectation_configuration\.py',
+    'tests/core/test_expectation_suite\.py',
+    'tests/core/test_expectation_validation_result\.py',
+    'tests/core/test_validation_definition\.py',
+    'tests/core/test_yaml_handler\.py',
+    'tests/data_context/abstract_data_context/test_abstract_data_context_datasource_crud\.py',
+    'tests/data_context/abstract_data_context/test_data_docs_config_crud\.py',
+    'tests/data_context/cloud_data_context/test_include_rendered_content\.py',
+    'tests/data_context/fixtures/plugins/extended_checkpoint\.py',
+    'tests/data_context/migrator',
+    'tests/data_context/store/test_configuration_store\.py',
+    'tests/data_context/store/test_data_asset_store\.py',
+    'tests/data_context/store/test_datasource_store\.py',
+    'tests/data_context/store/test_store_backends\.py',
+    'tests/data_context/test_data_context_in_code_config\.py',
+    'tests/data_context/test_data_context_state_management\.py',
+    'tests/data_context/test_data_context_variables\.py',
+    'tests/datasource/data_connector',
     'tests/datasource/fluent/tasks\.py',
-    'tests/integration/docusaurus/tutorials',
+    'tests/datasource/test_datasource_dict\.py',
+    'tests/datasource/test_new_datasource_with_sql_data_connector\.py',
+    'tests/datasource/test_simple_sqlalchemy_datasource\.py',
+    'tests/execution_engine/partition_and_sample',
+    'tests/expectations',
+    'tests/experimental/metric_repository/test_column_descriptive_metrics_metric_retriever_integration\.py',
+    'tests/experimental/metric_repository/test_column_descriptive_metrics_metric_retriever\.py',
+    'tests/experimental/metric_repository/test_metric_list_metric_retriever_integration\.py',
+    'tests/experimental/metric_repository/test_metric_list_metric_retriever\.py',
+    'tests/integration/common_workflows',
+    'tests/integration/db',
     'tests/integration/docusaurus/connecting_to_your_data',
     'tests/integration/docusaurus/deployment_patterns',
     'tests/integration/docusaurus/expectations',
     'tests/integration/docusaurus/reference',
     'tests/integration/docusaurus/setup',
+    'tests/integration/docusaurus/tutorials',
     'tests/integration/docusaurus/validation',
+    'tests/integration/fixtures/partition_and_sample_data',
+    'tests/integration/fixtures/yellow_tripdata_pandas_fixture',
+    'tests/integration/spark/test_spark_config\.py',
+    'tests/integration/test_definitions',
+    'tests/integration/test_script_runner\.py',
+    'tests/performance',
+    'tests/render',
+    'tests/rule_based_profiler',
+    'tests/test_fixtures/rule_based_profiler/plugins',
+    'tests/test_utils\.py',
+    'tests/validator/test_metric_configuration\.py',
+    'tests/validator/test_metrics_calculator\.py',
+    'tests/validator/test_validation_graph\.py',
 ]
 
 [[tool.mypy.overrides]]
@@ -154,6 +195,7 @@ module = [
     "great_expectations.rule_based_profiler.*",
     "great_expectations.self_check.*",
     "great_expectations.validation_operators.*",
+    "tests.rule_based_profiler.*",
     "tests.test_utils",
 ]
 follow_imports = 'silent'
@@ -168,12 +210,12 @@ disable_error_code = [
 
 [[tool.mypy.overrides]]
 module = [
-    "great_expectations.compatibility.pydantic.*",
     "altair.*",
     "boto3.*",
     "botocore.*",
     "clickhouse_sqlalchemy.*",
     "google.*",
+    "great_expectations.compatibility.pydantic.*",
     "ipywidgets.*",
     "mistune.*",
     "moto.*",
@@ -182,10 +224,12 @@ module = [
     "pyarrow.*",
     "pyfakefs.*",
     "pypd.*",
+    "pytest_timeout.*",
     "ruamel.*",
     "scipy.*",
     "sempy.*",
     "shapely.*",
+    "snapshottest.*",
     "snowflake.*",
     "sqlalchemy_bigquery.*",
     "sqlalchemy_dremio.*",
@@ -324,6 +368,7 @@ extend-exclude = [
 ]
 "tasks.py" = [
     "PLR0913",
+    "TID251", # not part of core code
 ]
 "tests/integration/docusaurus/*.py" = [
     "F841", # local variable is assigned to but never used - ignored for code used in snippets

diff --git a/reqs/requirements-dev-contrib.txt b/reqs/requirements-dev-contrib.txt
@@ -1,6 +1,6 @@
 adr-tools-python==1.0.3
 invoke>=2.0.0
-mypy==1.7.1
+mypy==1.9.0
 pre-commit>=2.21.0
 ruff==0.3.0
 tomli>=2.0.1
diff --git a/reqs/requirements-dev-lite.txt b/reqs/requirements-dev-lite.txt
@@ -12,7 +12,7 @@ pytest>=7.2,<8.0  # 8.0 seems to have undocumented change to pytest.warns(SomeWa
 pytest-benchmark>=3.4.1
 pytest-cov>=2.8.1
 pytest-icdiff>=0.6
-pytest-mock>=3.8.2
+pytest-mock>=3.14.0
 pytest-order>=0.9.5
 pytest-random-order>=1.0.4
 pytest-timeout>=2.1.0

diff --git a/requirements-types.txt b/requirements-types.txt
@@ -7,8 +7,10 @@
 --requirement docs/sphinx_api_docs_source/requirements-dev-api-docs.txt
 # typing stubs
 pandas-stubs
+types-beautifulsoup4
 types-colorama
 types-decorator
+types-html5lib
 types-jsonschema
 types-openpyxl
 types-protobuf

diff --git a/tasks.py b/tasks.py
@@ -496,6 +496,7 @@ def type_schema(  # noqa: C901 - too complex
     from great_expectations.datasource.fluent import (
         _PANDAS_SCHEMA_VERSION,
         BatchRequest,
+        DataAsset,
         Datasource,
     )
     from great_expectations.datasource.fluent.sources import (
@@ -515,7 +516,7 @@ def type_schema(  # noqa: C901 - too complex
     if not sync:
         print("--------------------\nRegistered Fluent types\n--------------------\n")
 
-    name_model = [
+    name_model: list[tuple[str, type[Datasource | BatchRequest | DataAsset]]] = [
         ("BatchRequest", BatchRequest),
         (Datasource.__name__, Datasource),
         *_iter_all_registered_types(),
@@ -626,7 +627,7 @@ def docs(
         ctx.run(" ".join(["yarn lint"]), echo=True)
     elif version:
         docs_builder = DocsBuilder(ctx, docusaurus_dir)
-        docs_builder.create_version(version=parse_version(version))
+        docs_builder.create_version(version=parse_version(version))  # type: ignore[arg-type]
     elif start:
         ctx.run(" ".join(["yarn start"]), echo=True)
     elif clear:
@@ -694,9 +695,9 @@ def link_checker(ctx: Context, skip_external: bool = True):
     """Checks the Docusaurus docs for broken links"""
     import docs.checks.docs_link_checker as checker
 
-    path: str = "docs/docusaurus/docs"
-    docs_root: str = "docs/docusaurus/docs"
-    static_root: str = "docs/docusaurus/static"
+    path = pathlib.Path("docs/docusaurus/docs")
+    docs_root = pathlib.Path("docs/docusaurus/docs")
+    static_root = pathlib.Path("docs/docusaurus/static")
     site_prefix: str = "docs"
     static_prefix: str = "static"
 
@@ -721,7 +722,7 @@ def show_automerges(ctx: Context):
     url = "https://api.github.com/repos/great-expectations/great_expectations/pulls"
     response = requests.get(
         url,
-        params={
+        params={  # type: ignore[arg-type]
             "state": "open",
             "sort": "updated",
             "direction": "desc",
@@ -1099,9 +1100,7 @@ def service(
         for service_name in service_names:
             cmds = []
 
-            if (
-                service_name == "mercury" and os.environ.get("CI") != "true"  # noqa: TID251
-            ):
+            if service_name == "mercury" and os.environ.get("CI") != "true":
                 cmds.extend(
                     [
                         "FORCE_NO_ALIAS=true",

diff --git a/tests/checkpoint/test_checkpoint_with_fluent_datasources.py b/tests/checkpoint/test_checkpoint_with_fluent_datasources.py
@@ -304,7 +304,7 @@ def test_newstyle_checkpoint_result_validations_include_rendered_content_data_co
                     },
                 ],
                 validations=[
-                    {
+                    {  # type: ignore[list-item]
                         "batch_request": {
                             "datasource_name": "my_pandas_filesystem_datasource",
                             "data_asset_name": "Titanic_1911",
@@ -350,7 +350,7 @@ def test_newstyle_checkpoint_result_validations_include_rendered_content_data_co
                     },
                 ],
                 validations=[
-                    {
+                    {  # type: ignore[list-item]
                         "id": "f22601d9-00b7-4d54-beb6-605d87a74e40",
                         "batch_request": {
                             "datasource_name": "my_pandas_filesystem_datasource",
@@ -376,7 +376,7 @@ def test_newstyle_checkpoint_result_validations_include_rendered_content_data_co
                     },
                 ],
                 validations=[
-                    {
+                    {  # type: ignore[list-item]
                         "id": "f22601d9-00b7-4d54-beb6-605d87a74e40",
                         "batch_request": {
                             "datasource_name": "my_pandas_filesystem_datasource",
@@ -404,11 +404,11 @@ def test_checkpoint_run_adds_validation_ids_to_expectation_suite_validation_resu
     result: CheckpointResult = checkpoint.run()
 
     # Always have a single validation result based on the test's parametrization
-    validation_result: ExpectationValidationResult | dict = tuple(result.run_results.values())[0][
+    validation_result: ExpectationValidationResult | dict = tuple(result.run_results.values())[0][  # type: ignore[assignment]
         "validation_result"
     ]
 
-    actual_validation_id: Optional[str] = validation_result.meta["validation_id"]
+    actual_validation_id: Optional[str] = validation_result.meta["validation_id"]  # type: ignore[union-attr]
     assert expected_validation_id == actual_validation_id