Skip to content

Commit

Permalink
Merge branch 'develop' into dependabot/npm_and_yarn/docs/docusaurus/k…
Browse files Browse the repository at this point in the history
…atex-0.16.10
  • Loading branch information
TrangPham committed Apr 9, 2024
2 parents 9fa44f2 + 3133ef8 commit 2ffd2e6
Show file tree
Hide file tree
Showing 211 changed files with 1,293 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,18 @@ import SetupAndInstallGx from '../../components/setup/link_lists/_setup_and_inst
import DataContextInitializeInstantiateSave from '../../components/setup/link_lists/_data_context_initialize_instatiate_save.md'
import TechnicalTag from '../../reference/learn/term_tags/_tag.mdx';

Use this quickstart to install GX, connect to sample data, build your first Expectation, validate data, and review the validation results. This is a great place to start if you're new to GX and aren't sure if it's the right solution for you or your organization. If you're using Databricks or SQL to store data, see [Get Started with GX and Databricks](./getting_started/how_to_use_great_expectations_in_databricks.md) or [Get Started with GX and SQL](./getting_started/how_to_use_great_expectations_with_sql.md).

:::note Great Expectations Cloud

You can use this quickstart with the open source Python version of GX or with Great Expectations Cloud.

If you're interested in participating in the Great Expectations Cloud Beta program, or you want to receive progress updates, [**sign up for the Beta program**](https://greatexpectations.io/cloud).

:::
Use this quickstart to install GX OSS, connect to sample data, build your first Expectation, validate data, and review the validation results. This is a great place to start if you're new to GX OSS and aren't sure if it's the right solution for you or your organization. If you're using Databricks or SQL to store data, see [Get Started with GX and Databricks](/oss/get_started/get_started_with_gx_and_databricks.md) or [Get Started with GX and SQL](/oss/get_started/get_started_with_gx_and_sql.md).

:::info Windows Support

Windows support for the open source Python version of GX is currently unavailable. If you’re using GX in a Windows environment, you might experience errors or performance issues.
Windows support for the open source Python version of GX OSS is currently unavailable. If you’re using GX OSS in a Windows environment, you might experience errors or performance issues.

:::


## Data validation workflow

The following diagram illustrates the end-to-end GX data validation workflow that you'll implement with this quickstart. Click a workflow step to view the related content.
The following diagram illustrates the end-to-end GX OSS data validation workflow that you'll implement with this quickstart. Click a workflow step to view the related content.

```mermaid
flowchart LR
Expand Down Expand Up @@ -65,7 +57,7 @@ click 7 "#validate-data"
- An internet browser


## Install GX
## Install GX OSS

1. Run the following command in an empty base directory inside a Python virtual environment:

Expand Down Expand Up @@ -124,8 +116,6 @@ click 7 "#validate-data"

## Related documentation

If you're ready to continue your GX journey, the following topics can help you implement a solution for your specific environment and business requirements:

- [Install GX in a specific environment with support for a specific Data Source](/oss/guides/setup/installation/install_gx.md).
- [Initialize, instantiate, and save a Data Context](/oss/guides/setup/configure_data_contexts_lp.md).
- [Connect to Data Sources](/oss/guides/connecting_to_your_data/connect_to_data_lp.md).
Expand Down
26 changes: 26 additions & 0 deletions great_expectations/checkpoint/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,32 @@ def _root_validate_email_params(cls, values: dict) -> dict:

return values

@override
def v1_run(self, checkpoint_result: CheckpointResult) -> str | dict:
success = checkpoint_result.success or False
if not self._is_enabled(success=success):
return {"email_result": ""}

title, html = self.renderer.v1_render(checkpoint_result=checkpoint_result)
receiver_emails_list = list(map(lambda x: x.strip(), self.receiver_emails.split(",")))

# this will actually send the email
email_result = send_email(
title=title,
html=html,
smtp_address=self.smtp_address,
smtp_port=self.smtp_port,
sender_login=self.sender_login,
sender_password=self.sender_password,
sender_alias=self.sender_alias,
receiver_emails_list=receiver_emails_list,
use_tls=self.use_tls,
use_ssl=self.use_ssl,
)

# sending payload back as dictionary
return {"email_result": email_result}

@override
def _run( # type: ignore[override] # signature does not match parent # noqa: PLR0913
self,
Expand Down
6 changes: 5 additions & 1 deletion great_expectations/core/batch_definition.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING, Optional

from great_expectations.compatibility import pydantic
Expand All @@ -26,6 +27,7 @@ class BatchDefinition(pydantic.BaseModel):
id: Optional[str] = None
name: str
partitioner: Optional[Partitioner] = None
batching_regex: Optional[re.Pattern] = None

# private attributes that must be set immediately after instantiation
_data_asset: DataAsset = pydantic.PrivateAttr()
Expand All @@ -43,7 +45,9 @@ def build_batch_request(
) -> BatchRequest:
"""Build a BatchRequest from the asset and batch request options."""
return self.data_asset.build_batch_request(
options=batch_request_options, partitioner=self.partitioner
options=batch_request_options,
partitioner=self.partitioner,
batching_regex=self.batching_regex,
)

def save(self) -> None:
Expand Down
8 changes: 7 additions & 1 deletion great_expectations/core/factory/checkpoint_factory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Iterable

from great_expectations._docs_decorators import public_api
from great_expectations.checkpoint.v1_checkpoint import Checkpoint
Expand Down Expand Up @@ -80,6 +80,12 @@ def get(self, name: str) -> Checkpoint:

return self._get(key=key)

@public_api
@override
def all(self) -> Iterable[Checkpoint]:
"""Get all Checkpoints."""
return self._store.get_all()

def _get(self, key: GXCloudIdentifier | StringKey) -> Checkpoint:
checkpoint = self._store.get(key=key)
if not isinstance(checkpoint, Checkpoint):
Expand Down
6 changes: 5 additions & 1 deletion great_expectations/core/factory/factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Generic, TypeVar
from typing import Generic, Iterable, TypeVar

T = TypeVar("T")

Expand All @@ -20,3 +20,7 @@ def delete(self, obj: T) -> T:
@abstractmethod
def get(self, name: str) -> T:
pass

@abstractmethod
def all(self) -> Iterable[T]:
pass
13 changes: 12 additions & 1 deletion great_expectations/core/factory/suite_factory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Iterable

from great_expectations._docs_decorators import public_api
from great_expectations.analytics.client import submit as submit_event
Expand Down Expand Up @@ -90,6 +90,17 @@ def get(self, name: str) -> ExpectationSuite:
if not self._store.has_key(key=key):
raise DataContextError(f"ExpectationSuite with name {name} was not found.") # noqa: TRY003
suite_dict = self._store.get(key=key)
return self._deserialize(suite_dict)

@public_api
@override
def all(self) -> Iterable[ExpectationSuite]:
"""Get all ExpectationSuites."""
dicts = self._store.get_all()
return [self._deserialize(suite_dict) for suite_dict in dicts]

def _deserialize(self, suite_dict: dict) -> ExpectationSuite:
# TODO: Move this logic to the store
suite = ExpectationSuite(**suite_dict)
if self._include_rendered_content:
suite.render()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, cast
from typing import TYPE_CHECKING, Iterable, cast

from great_expectations._docs_decorators import public_api
from great_expectations.compatibility.typing_extensions import override
Expand Down Expand Up @@ -75,3 +75,9 @@ def get(self, name: str) -> ValidationDefinition:
raise DataContextError(f"ValidationDefinition with name {name} was not found.") # noqa: TRY003

return cast(ValidationDefinition, self._store.get(key=key))

@public_api
@override
def all(self) -> Iterable[ValidationDefinition]:
"""Get all ValidationDefinitions."""
return self._store.get_all()
1 change: 1 addition & 0 deletions great_expectations/datasource/fluent/batch_request.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class BatchRequest(pydantic.BaseModel):
options: Optional[BatchRequestOptions] = None,
batch_slice: Optional[BatchSlice] = None,
partitioner: Optional[Partitioner] = None,
batching_regex: Optional[re.Pattern] = None,
) -> None: ...
@property
def batch_slice(self) -> slice: ...
Expand Down
5 changes: 5 additions & 0 deletions great_expectations/datasource/fluent/file_path_data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import copy
import logging
import re
import warnings
from collections import Counter
from pprint import pformat as pf
Expand Down Expand Up @@ -81,6 +82,7 @@

class _FilePathDataAsset(DataAsset):
_EXCLUDE_FROM_READER_OPTIONS: ClassVar[Set[str]] = {
"batch_definitions",
"type",
"name",
"order_by",
Expand Down Expand Up @@ -179,6 +181,7 @@ def build_batch_request(
options: Optional[BatchRequestOptions] = None,
batch_slice: Optional[BatchSlice] = None,
partitioner: Optional[Partitioner] = None,
batching_regex: Optional[re.Pattern] = None,
) -> BatchRequest:
"""A batch request that can be used to obtain batches for this DataAsset.
Expand All @@ -189,6 +192,7 @@ def build_batch_request(
batch_slice: A python slice that can be used to limit the sorted batches by index.
e.g. `batch_slice = "[-5:]"` will request only the last 5 batches after the options filter is applied.
partitioner: A Partitioner used to narrow the data returned from the asset.
batching_regex: A Regular Expression used to build batches in path based Assets.
Returns:
A BatchRequest object that can be used to obtain a batch list from a Datasource by calling the
Expand Down Expand Up @@ -229,6 +233,7 @@ def build_batch_request(
options=options or {},
batch_slice=batch_slice,
partitioner=partitioner,
batching_regex=batching_regex,
)

@override
Expand Down
3 changes: 3 additions & 0 deletions great_expectations/datasource/fluent/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import dataclasses
import functools
import logging
import re
import uuid
import warnings
from pprint import pformat as pf
Expand Down Expand Up @@ -218,6 +219,7 @@ def build_batch_request(
options: Optional[BatchRequestOptions] = None,
batch_slice: Optional[BatchSlice] = None,
partitioner: Optional[Partitioner] = None,
batching_regex: Optional[re.Pattern] = None,
) -> BatchRequest:
"""A batch request that can be used to obtain batches for this DataAsset.
Expand All @@ -228,6 +230,7 @@ def build_batch_request(
batch_slice: A python slice that can be used to limit the sorted batches by index.
e.g. `batch_slice = "[-5:]"` will request only the last 5 batches after the options filter is applied.
partitioner: A Partitioner used to narrow the data returned from the asset.
batching_regex: A Regular Expression used to build batches in path based Assets.
Returns:
A BatchRequest object that can be used to obtain a batch list from a Datasource by calling the
Expand Down
1 change: 1 addition & 0 deletions great_expectations/datasource/fluent/invalid_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def build_batch_request(
options: dict | None = None,
batch_slice: Any = None,
partitioner: Any = None,
batching_regex: Any = None,
) -> NoReturn:
self._raise_type_error()

Expand Down
13 changes: 12 additions & 1 deletion great_expectations/datasource/fluent/pandas_datasource.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import logging
import re
import sqlite3
import uuid
from pprint import pformat as pf
Expand Down Expand Up @@ -177,13 +178,15 @@ def build_batch_request(
options: Optional[BatchRequestOptions] = None,
batch_slice: Optional[BatchSlice] = None,
partitioner: Optional[Partitioner] = None,
batching_regex: Optional[re.Pattern] = None,
) -> BatchRequest:
"""A batch request that can be used to obtain batches for this DataAsset.
Args:
options: This is not currently supported and must be {}/None for this data asset.
batch_slice: This is not currently supported and must be None for this data asset.
partitioner: This is not currently supported and must be None for this data asset.
batching_regex: A Regular Expression used to build batches in path based Assets.
Returns:
A BatchRequest object that can be used to obtain a batch list from a Datasource by calling the
Expand All @@ -208,6 +211,7 @@ def build_batch_request(
datasource_name=self.datasource.name,
data_asset_name=self.name,
options={},
batching_regex=batching_regex,
)

@override
Expand Down Expand Up @@ -383,12 +387,13 @@ def _get_reader_options_include(self) -> set[str]:
version="0.16.15",
)
@override
def build_batch_request( # type: ignore[override]
def build_batch_request( # type: ignore[override] # noqa: PLR0913
self,
dataframe: Optional[pd.DataFrame] = None,
options: Optional[BatchRequestOptions] = None,
batch_slice: Optional[BatchSlice] = None,
partitioner: Optional[Partitioner] = None,
batching_regex: Optional[re.Pattern] = None,
) -> BatchRequest:
"""A batch request that can be used to obtain batches for this DataAsset.
Expand All @@ -397,6 +402,7 @@ def build_batch_request( # type: ignore[override]
options: This is not currently supported and must be {}/None for this data asset.
batch_slice: This is not currently supported and must be None for this data asset.
partitioner: This is not currently supported and must be None for this data asset.
batching_regex: This is currently not supported and must be None for this data asset.
Returns:
A BatchRequest object that can be used to obtain a batch list from a Datasource by calling the
Expand All @@ -417,6 +423,11 @@ def build_batch_request( # type: ignore[override]
"partitioner is not currently supported and must be None for this DataAsset."
)

if batching_regex is not None:
raise ValueError( # noqa: TRY003
"batching_regex is not currently supported and must be None for this DataAsset."
)

if dataframe is None:
df = self.dataframe
else:
Expand Down
2 changes: 2 additions & 0 deletions great_expectations/datasource/fluent/pandas_datasource.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import sqlite3
import typing
from logging import Logger
Expand Down Expand Up @@ -78,6 +79,7 @@ class _PandasDataAsset(DataAsset):
options: Optional[BatchRequestOptions] = ...,
batch_slice: Optional[BatchSlice] = ...,
partitioner: Optional[Partitioner] = ...,
batching_regex: Optional[re.Pattern] = ...,
) -> BatchRequest: ...
@override
def _validate_batch_request(self, batch_request: BatchRequest) -> None: ...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
}
]
},
"batching_regex": {
"title": "Batching Regex",
"type": "string",
"format": "regex"
},
"batch_slice": {
"title": "Batch Slice",
"anyOf": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@
"$ref": "#/definitions/PartitionerConvertedDatetime"
}
]
},
"batching_regex": {
"title": "Batching Regex",
"type": "string",
"format": "regex"
}
},
"required": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,11 @@
"$ref": "#/definitions/PartitionerConvertedDatetime"
}
]
},
"batching_regex": {
"title": "Batching Regex",
"type": "string",
"format": "regex"
}
},
"required": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,11 @@
"$ref": "#/definitions/PartitionerConvertedDatetime"
}
]
},
"batching_regex": {
"title": "Batching Regex",
"type": "string",
"format": "regex"
}
},
"required": [
Expand Down

0 comments on commit 2ffd2e6

Please sign in to comment.