From 25d4994d37c50d9b484f749dea8884123c5e7860 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Fri, 8 Jul 2022 13:52:27 -0400 Subject: [PATCH 01/11] Widen dependencies. This should make it easier to install Feast alongside other Python packages. Signed-off-by: Abhin Chhabra Signed-off-by: Kevin Zhang --- sdk/python/feast/cli.py | 30 +++++++-- sdk/python/feast/data_source.py | 5 +- sdk/python/feast/diff/infra_diff.py | 10 ++- sdk/python/feast/diff/registry_diff.py | 16 +++-- sdk/python/feast/driver_test_data.py | 7 +- .../embedded_go/online_features_service.py | 7 +- sdk/python/feast/feature.py | 9 ++- sdk/python/feast/feature_logging.py | 4 +- sdk/python/feast/feature_store.py | 66 +++++++++++++------ sdk/python/feast/feature_view.py | 5 +- sdk/python/feast/field.py | 6 +- sdk/python/feast/inference.py | 13 +++- sdk/python/feast/infra/aws.py | 5 +- .../batch_materialization_engine.py | 2 +- .../feast/infra/offline_stores/bigquery.py | 31 ++++++--- .../infra/offline_stores/bigquery_source.py | 10 ++- .../postgres_offline_store/postgres.py | 9 ++- .../tests/data_source.py | 9 ++- .../contrib/spark_offline_store/spark.py | 7 +- .../spark_offline_store/spark_source.py | 10 ++- .../test_config/manual_tests.py | 3 +- .../trino_offline_store/tests/data_source.py | 5 +- .../contrib/trino_offline_store/trino.py | 21 ++++-- .../trino_offline_store/trino_source.py | 6 +- sdk/python/feast/infra/offline_stores/file.py | 36 +++++++--- .../infra/offline_stores/offline_store.py | 6 +- .../feast/infra/offline_stores/redshift.py | 9 ++- .../feast/infra/offline_stores/snowflake.py | 8 ++- .../infra/offline_stores/snowflake_source.py | 4 +- .../infra/online_stores/contrib/postgres.py | 9 ++- .../feast/infra/online_stores/datastore.py | 15 ++++- .../feast/infra/online_stores/dynamodb.py | 6 +- .../feast/infra/online_stores/sqlite.py | 5 +- .../feast/infra/passthrough_provider.py | 10 ++- sdk/python/feast/infra/provider.py | 14 +++- sdk/python/feast/infra/registry_stores/sql.py | 21 ++++-- sdk/python/feast/infra/utils/aws_utils.py | 43 ++++++++++-- .../infra/utils/postgres/connection_utils.py | 5 +- .../feast/infra/utils/snowflake_utils.py | 5 +- sdk/python/feast/on_demand_feature_view.py | 17 +++-- sdk/python/feast/registry.py | 50 +++++++------- sdk/python/feast/repo_config.py | 16 +++-- sdk/python/feast/stream_feature_view.py | 3 +- sdk/python/feast/templates/aws/test.py | 3 +- sdk/python/feast/templates/gcp/test.py | 3 +- .../feast/templates/postgres/driver_repo.py | 5 +- sdk/python/feast/templates/postgres/test.py | 3 +- sdk/python/feast/templates/snowflake/test.py | 3 +- sdk/python/feast/templates/spark/example.py | 10 ++- sdk/python/feast/types.py | 4 +- sdk/python/feast/ui_server.py | 4 +- sdk/python/feast/utils.py | 6 +- .../requirements/py3.10-ci-requirements.txt | 11 ++-- .../requirements/py3.10-requirements.txt | 13 ++-- .../requirements/py3.8-ci-requirements.txt | 31 ++++----- .../requirements/py3.8-requirements.txt | 13 ++-- .../requirements/py3.9-ci-requirements.txt | 11 ++-- .../requirements/py3.9-requirements.txt | 13 ++-- ...st_benchmark_universal_online_retrieval.py | 4 +- sdk/python/tests/conftest.py | 12 +++- sdk/python/tests/doctest/test_all.py | 8 ++- .../example_repos/example_feature_repo_1.py | 6 +- .../example_repos/example_feature_repo_2.py | 5 +- .../example_feature_repo_version_0_19.py | 3 +- ..._repo_with_duplicated_featureview_names.py | 6 +- ...ample_feature_repo_with_entity_join_key.py | 6 +- .../on_demand_feature_view_repo.py | 5 +- sdk/python/tests/foo_provider.py | 5 +- .../integration/e2e/test_go_feature_server.py | 4 +- .../e2e/test_python_feature_server.py | 5 +- .../tests/integration/e2e/test_validation.py | 16 +++-- .../feature_repos/repo_configuration.py | 10 +-- .../universal/data_sources/file.py | 4 +- .../feature_repos/universal/feature_views.py | 6 +- .../offline_store/test_feature_logging.py | 6 +- .../offline_store/test_s3_custom_endpoint.py | 4 +- .../test_universal_historical_retrieval.py | 9 ++- .../online_store/test_e2e_local.py | 3 +- .../online_store/test_universal_online.py | 5 +- .../integration/registration/test_cli.py | 3 +- .../registration/test_feature_store.py | 23 +++++-- .../registration/test_inference.py | 19 ++++-- .../integration/registration/test_registry.py | 49 ++++++++++---- .../registration/test_sql_registry.py | 34 +++++++--- .../test_stream_feature_view_apply.py | 11 +++- .../test_universal_odfv_feature_inference.py | 6 +- .../registration/test_universal_types.py | 39 +++++++---- .../tests/unit/diff/test_registry_diff.py | 15 ++++- sdk/python/tests/unit/test_data_sources.py | 18 +++-- sdk/python/tests/unit/test_entity.py | 4 +- sdk/python/tests/unit/test_feature_views.py | 8 ++- .../tests/unit/test_on_demand_feature_view.py | 3 +- sdk/python/tests/unit/test_usage.py | 2 +- sdk/python/tests/utils/data_source_utils.py | 9 ++- setup.py | 28 ++++---- 95 files changed, 789 insertions(+), 325 deletions(-) diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 91815d30fd2..99e084b6664 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -524,7 +524,10 @@ def registry_dump_command(ctx: click.Context): @click.argument("start_ts") @click.argument("end_ts") @click.option( - "--views", "-v", help="Feature views to materialize", multiple=True, + "--views", + "-v", + help="Feature views to materialize", + multiple=True, ) @click.pass_context def materialize_command( @@ -551,7 +554,10 @@ def materialize_command( @cli.command("materialize-incremental") @click.argument("end_ts") @click.option( - "--views", "-v", help="Feature views to incrementally materialize", multiple=True, + "--views", + "-v", + help="Feature views to incrementally materialize", + multiple=True, ) @click.pass_context def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List[str]): @@ -622,10 +628,14 @@ def init_command(project_directory, minimal: bool, template: str): help="Specify a server type: 'http' or 'grpc' [default: http]", ) @click.option( - "--no-access-log", is_flag=True, help="Disable the Uvicorn access log.", + "--no-access-log", + is_flag=True, + help="Disable the Uvicorn access log.", ) @click.option( - "--no-feature-log", is_flag=True, help="Disable logging served features", + "--no-feature-log", + is_flag=True, + help="Disable logging served features", ) @click.pass_context def serve_command( @@ -771,13 +781,19 @@ def disable_alpha_features(ctx: click.Context): @cli.command("validate") @click.option( - "--feature-service", "-f", help="Specify a feature service name", + "--feature-service", + "-f", + help="Specify a feature service name", ) @click.option( - "--reference", "-r", help="Specify a validation reference name", + "--reference", + "-r", + help="Specify a validation reference name", ) @click.option( - "--no-profile-cache", is_flag=True, help="Do not store cached profile in registry", + "--no-profile-cache", + is_flag=True, + help="Do not store cached profile in registry", ) @click.argument("start_ts") @click.argument("end_ts") diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index f5c40d24214..6ab7934371b 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -116,7 +116,10 @@ class KinesisOptions: """ def __init__( - self, record_format: StreamFormat, region: str, stream_name: str, + self, + record_format: StreamFormat, + region: str, + stream_name: str, ): self.record_format = record_format self.region = region diff --git a/sdk/python/feast/diff/infra_diff.py b/sdk/python/feast/diff/infra_diff.py index a09eaf39ebe..51bece33dd6 100644 --- a/sdk/python/feast/diff/infra_diff.py +++ b/sdk/python/feast/diff/infra_diff.py @@ -126,7 +126,8 @@ def diff_infra_protos( infra_objects_to_delete, infra_objects_to_add, ) = tag_infra_proto_objects_for_keep_delete_add( - current_infra_objects, new_infra_objects, + current_infra_objects, + new_infra_objects, ) for e in infra_objects_to_add: @@ -199,5 +200,10 @@ def diff_between( ) ) return InfraObjectDiff( - new.name, infra_object_type, current, new, property_diffs, transition, + new.name, + infra_object_type, + current, + new, + property_diffs, + transition, ) diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 7a5b9b7564f..fc0acf0223a 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -161,7 +161,9 @@ def diff_registry_objects( def extract_objects_for_keep_delete_update_add( - registry: BaseRegistry, current_project: str, desired_repo_contents: RepoContents, + registry: BaseRegistry, + current_project: str, + desired_repo_contents: RepoContents, ) -> Tuple[ Dict[FeastObjectType, Set[FeastObject]], Dict[FeastObjectType, Set[FeastObject]], @@ -208,7 +210,9 @@ def extract_objects_for_keep_delete_update_add( def diff_between( - registry: BaseRegistry, current_project: str, desired_repo_contents: RepoContents, + registry: BaseRegistry, + current_project: str, + desired_repo_contents: RepoContents, ) -> RegistryDiff: """ Returns the difference between the current and desired repo states. @@ -305,12 +309,16 @@ def apply_diff_to_registry( BaseFeatureView, feast_object_diff.current_feast_object ) registry.delete_feature_view( - feature_view_obj.name, project, commit=False, + feature_view_obj.name, + project, + commit=False, ) elif feast_object_diff.feast_object_type == FeastObjectType.DATA_SOURCE: ds_obj = cast(DataSource, feast_object_diff.current_feast_object) registry.delete_data_source( - ds_obj.name, project, commit=False, + ds_obj.name, + project, + commit=False, ) if feast_object_diff.transition_type in [ diff --git a/sdk/python/feast/driver_test_data.py b/sdk/python/feast/driver_test_data.py index 991b5391e85..da9d0613138 100644 --- a/sdk/python/feast/driver_test_data.py +++ b/sdk/python/feast/driver_test_data.py @@ -30,7 +30,12 @@ def _convert_event_timestamp(event_timestamp: pd.Timestamp, t: EventTimestampTyp def create_orders_df( - customers, drivers, start_date, end_date, order_count, locations=None, + customers, + drivers, + start_date, + end_date, + order_count, + locations=None, ) -> pd.DataFrame: """ Example df generated by this function (if locations): diff --git a/sdk/python/feast/embedded_go/online_features_service.py b/sdk/python/feast/embedded_go/online_features_service.py index d9b34b2414c..bf82fab6a33 100644 --- a/sdk/python/feast/embedded_go/online_features_service.py +++ b/sdk/python/feast/embedded_go/online_features_service.py @@ -50,7 +50,8 @@ def __init__( ) self._service = NewOnlineFeatureService( - self._config, self._transformation_callback, + self._config, + self._transformation_callback, ) # This should raise an exception if there were any errors in NewOnlineFeatureService. @@ -263,7 +264,9 @@ def transformation_callback( def logging_callback( - fs: "FeatureStore", feature_service_name: str, dataset_dir: str, + fs: "FeatureStore", + feature_service_name: str, + dataset_dir: str, ) -> bytes: feature_service = fs.get_feature_service(feature_service_name, allow_cache=True) try: diff --git a/sdk/python/feast/feature.py b/sdk/python/feast/feature.py index d1f96c302ae..6b5acd9fc64 100644 --- a/sdk/python/feast/feature.py +++ b/sdk/python/feast/feature.py @@ -30,7 +30,10 @@ class Feature: """ def __init__( - self, name: str, dtype: ValueType, labels: Optional[Dict[str, str]] = None, + self, + name: str, + dtype: ValueType, + labels: Optional[Dict[str, str]] = None, ): """Creates a Feature object.""" self._name = name @@ -91,7 +94,9 @@ def to_proto(self) -> FeatureSpecProto: value_type = ValueTypeProto.Enum.Value(self.dtype.name) return FeatureSpecProto( - name=self.name, value_type=value_type, tags=self.labels, + name=self.name, + value_type=value_type, + tags=self.labels, ) @classmethod diff --git a/sdk/python/feast/feature_logging.py b/sdk/python/feast/feature_logging.py index 275bde72ec1..da9a0c9fe5f 100644 --- a/sdk/python/feast/feature_logging.py +++ b/sdk/python/feast/feature_logging.py @@ -34,12 +34,12 @@ class LoggingSource: @abc.abstractmethod def get_schema(self, registry: "BaseRegistry") -> pa.Schema: - """ Generate schema for logs destination. """ + """Generate schema for logs destination.""" raise NotImplementedError @abc.abstractmethod def get_log_timestamp_column(self) -> str: - """ Return timestamp column that must exist in generated schema. """ + """Return timestamp column that must exist in generated schema.""" raise NotImplementedError diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 4856a46f8ce..ce2c98e1ea4 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -118,7 +118,9 @@ class FeatureStore: @log_exceptions def __init__( - self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None, + self, + repo_path: Optional[str] = None, + config: Optional[RepoConfig] = None, ): """ Creates a FeatureStore object. @@ -253,7 +255,9 @@ def list_request_feature_views( ) def _list_feature_views( - self, allow_cache: bool = False, hide_dummy_entity: bool = True, + self, + allow_cache: bool = False, + hide_dummy_entity: bool = True, ) -> List[FeatureView]: feature_views = [] for fv in self._registry.list_feature_views( @@ -266,7 +270,9 @@ def _list_feature_views( return feature_views def _list_stream_feature_views( - self, allow_cache: bool = False, hide_dummy_entity: bool = True, + self, + allow_cache: bool = False, + hide_dummy_entity: bool = True, ) -> List[StreamFeatureView]: stream_feature_views = [] for sfv in self._registry.list_stream_feature_views( @@ -480,7 +486,9 @@ def delete_feature_service(self, name: str): return self._registry.delete_feature_service(name, self.project) def _get_features( - self, features: Union[List[str], FeatureService], allow_cache: bool = False, + self, + features: Union[List[str], FeatureService], + allow_cache: bool = False, ) -> List[str]: _features = features @@ -589,7 +597,8 @@ def _make_inferences( feature_service.infer_features(fvs_to_update=fvs_to_update_map) def _get_feature_views_to_materialize( - self, feature_views: Optional[List[str]], + self, + feature_views: Optional[List[str]], ) -> List[FeatureView]: """ Returns the list of feature views that should be materialized. @@ -1069,7 +1078,8 @@ def get_historical_features( for feature_name in odfv_request_data_schema.keys(): if feature_name not in entity_df.columns: raise RequestDataNotFoundInEntityDfException( - feature_name=feature_name, feature_view_name=odfv.name, + feature_name=feature_name, + feature_view_name=odfv.name, ) _validate_feature_refs(_feature_refs, full_feature_names) @@ -1182,7 +1192,9 @@ def get_saved_dataset(self, name: str) -> SavedDataset: @log_exceptions_and_usage def materialize_incremental( - self, end_date: datetime, feature_views: Optional[List[str]] = None, + self, + end_date: datetime, + feature_views: Optional[List[str]] = None, ) -> None: """ Materialize incremental new data from the offline store into the online store. @@ -1264,7 +1276,10 @@ def tqdm_builder(length): ) self._registry.apply_materialization( - feature_view, self.project, start_date, end_date, + feature_view, + self.project, + start_date, + end_date, ) @log_exceptions_and_usage @@ -1336,7 +1351,10 @@ def tqdm_builder(length): ) self._registry.apply_materialization( - feature_view, self.project, start_date, end_date, + feature_view, + self.project, + start_date, + end_date, ) @log_exceptions_and_usage @@ -1439,8 +1457,8 @@ def write_to_offline_store( ) # Get columns of the batch source and the input dataframe. - column_names_and_types = feature_view.batch_source.get_table_column_names_and_types( - self.config + column_names_and_types = ( + feature_view.batch_source.get_table_column_names_and_types(self.config) ) source_columns = [column for column, _ in column_names_and_types] input_columns = df.columns.values.tolist() @@ -1701,12 +1719,17 @@ def _get_online_features( for table, requested_features in grouped_refs: # Get the correct set of entity values with the correct join keys. table_entity_values, idxs = self._get_unique_entities( - table, join_key_values, entity_name_to_join_key_map, + table, + join_key_values, + entity_name_to_join_key_map, ) # Fetch feature data for the minimum set of Entities. feature_data = self._read_from_online_store( - table_entity_values, provider, requested_features, table, + table_entity_values, + provider, + requested_features, + table, ) # Populate the result_rows with the Features from the OnlineStore inplace. @@ -1875,7 +1898,9 @@ def _get_unique_entities( """ # Get the correct set of entity values with the correct join keys. table_entity_values = self._get_table_entity_values( - table, entity_name_to_join_key_map, join_key_values, + table, + entity_name_to_join_key_map, + join_key_values, ) # Convert back to rowise. @@ -2060,7 +2085,8 @@ def _augment_response_with_on_demand_transforms( for odfv_name, _feature_refs in odfv_feature_refs.items(): odfv = requested_odfv_map[odfv_name] transformed_features_df = odfv.get_transformed_features_df( - initial_response_df, full_feature_names, + initial_response_df, + full_feature_names, ) selected_subset = [ f for f in transformed_features_df.columns if f in _feature_refs @@ -2117,9 +2143,7 @@ def _get_feature_views_to_use( features: Optional[Union[List[str], FeatureService]], allow_cache=False, hide_dummy_entity: bool = True, - ) -> Tuple[ - List[FeatureView], List[RequestFeatureView], List[OnDemandFeatureView], - ]: + ) -> Tuple[List[FeatureView], List[RequestFeatureView], List[OnDemandFeatureView]]: fvs = { fv.name: fv @@ -2364,10 +2388,10 @@ def get_validation_reference( self, name: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Raises: - ValidationReferenceNotFoundException: The validation reference could not be found. + Raises: + ValidationReferenceNotFoundException: The validation reference could not be found. """ ref = self._registry.get_validation_reference( name, project=self.project, allow_cache=allow_cache diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index dd8cb4f0a67..03103766462 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -44,7 +44,10 @@ DUMMY_ENTITY_ID = "__dummy_id" DUMMY_ENTITY_NAME = "__dummy" DUMMY_ENTITY_VAL = "" -DUMMY_ENTITY = Entity(name=DUMMY_ENTITY_NAME, join_keys=[DUMMY_ENTITY_ID],) +DUMMY_ENTITY = Entity( + name=DUMMY_ENTITY_NAME, + join_keys=[DUMMY_ENTITY_ID], +) @typechecked diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index d0b4274cd28..a3dc3732da7 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -38,7 +38,11 @@ class Field: tags: Dict[str, str] def __init__( - self, *, name: str, dtype: FeastType, tags: Optional[Dict[str, str]] = None, + self, + *, + name: str, + dtype: FeastType, + tags: Optional[Dict[str, str]] = None, ): """ Creates a Field object. diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 011a3b99b21..0b8e42b4e9e 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -144,7 +144,8 @@ def update_feature_views_with_inferred_features_and_entities( ): fv.entity_columns.append( Field( - name=entity.join_key, dtype=from_value_type(entity.value_type), + name=entity.join_key, + dtype=from_value_type(entity.value_type), ) ) @@ -166,7 +167,10 @@ def update_feature_views_with_inferred_features_and_entities( if run_inference_for_entities or run_inference_for_features: _infer_features_and_entities( - fv, join_keys, run_inference_for_features, config, + fv, + join_keys, + run_inference_for_features, + config, ) if not fv.features: @@ -177,7 +181,10 @@ def update_feature_views_with_inferred_features_and_entities( def _infer_features_and_entities( - fv: FeatureView, join_keys: Set[str], run_inference_for_features, config, + fv: FeatureView, + join_keys: Set[str], + run_inference_for_features, + config, ) -> None: """ Updates the specific feature in place with inferred features and entities. diff --git a/sdk/python/feast/infra/aws.py b/sdk/python/feast/infra/aws.py index 4109856e609..145c55952ea 100644 --- a/sdk/python/feast/infra/aws.py +++ b/sdk/python/feast/infra/aws.py @@ -205,7 +205,10 @@ def _deploy_feature_server(self, project: str, image_uri: str): @log_exceptions_and_usage(provider="AwsProvider") def teardown_infra( - self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], + self, + project: str, + tables: Sequence[FeatureView], + entities: Sequence[Entity], ) -> None: super(AwsProvider, self).teardown_infra(project, tables, entities) diff --git a/sdk/python/feast/infra/materialization/batch_materialization_engine.py b/sdk/python/feast/infra/materialization/batch_materialization_engine.py index 773c685d6e5..1890ffed5a2 100644 --- a/sdk/python/feast/infra/materialization/batch_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/batch_materialization_engine.py @@ -96,7 +96,7 @@ def update( entities_to_keep: Sequence[Entity], ): """This method ensures that any necessary infrastructure or resources needed by the - engine are set up ahead of materialization.""" + engine are set up ahead of materialization.""" @abstractmethod def materialize( diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 982045607da..6c2bef757a5 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -134,7 +134,10 @@ def pull_latest_from_table_or_query( # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized return BigQueryRetrievalJob( - query=query, client=client, config=config, full_feature_names=False, + query=query, + client=client, + config=config, + full_feature_names=False, ) @staticmethod @@ -164,7 +167,10 @@ def pull_all_from_table_or_query( WHERE {timestamp_field} BETWEEN TIMESTAMP('{start_date}') AND TIMESTAMP('{end_date}') """ return BigQueryRetrievalJob( - query=query, client=client, config=config, full_feature_names=False, + query=query, + client=client, + config=config, + full_feature_names=False, ) @staticmethod @@ -195,20 +201,27 @@ def get_historical_features( config.offline_store.location, ) - entity_schema = _get_entity_schema(client=client, entity_df=entity_df,) + entity_schema = _get_entity_schema( + client=client, + entity_df=entity_df, + ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema + entity_df_event_timestamp_col = ( + offline_utils.infer_event_timestamp_from_entity_df(entity_schema) ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, client, + entity_df, + entity_df_event_timestamp_col, + client, ) @contextlib.contextmanager def query_generator() -> Iterator[str]: _upload_entity_df( - client=client, table_name=table_reference, entity_df=entity_df, + client=client, + table_name=table_reference, + entity_df=entity_df, ) expected_join_keys = offline_utils.get_expected_join_keys( @@ -598,7 +611,9 @@ def _get_table_reference_for_new_entity( def _upload_entity_df( - client: Client, table_name: str, entity_df: Union[pd.DataFrame, str], + client: Client, + table_name: str, + entity_df: Union[pd.DataFrame, str], ) -> Table: """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table""" job: Union[bigquery.job.query.QueryJob, bigquery.job.load.LoadJob] diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index b06cc233696..bb8316869b7 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -204,7 +204,9 @@ class BigQueryOptions: """ def __init__( - self, table: Optional[str], query: Optional[str], + self, + table: Optional[str], + query: Optional[str], ): self.table = table or "" self.query = query or "" @@ -221,7 +223,8 @@ def from_proto(cls, bigquery_options_proto: DataSourceProto.BigQueryOptions): Returns a BigQueryOptions object based on the bigquery_options protobuf """ bigquery_options = cls( - table=bigquery_options_proto.table, query=bigquery_options_proto.query, + table=bigquery_options_proto.table, + query=bigquery_options_proto.query, ) return bigquery_options @@ -234,7 +237,8 @@ def to_proto(self) -> DataSourceProto.BigQueryOptions: BigQueryOptionsProto protobuf """ bigquery_options_proto = DataSourceProto.BigQueryOptions( - table=self.table, query=self.query, + table=self.table, + query=self.query, ) return bigquery_options_proto diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index f2aa535c1d4..415a46dde77 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -127,8 +127,8 @@ def query_generator() -> Iterator[str]: else: raise TypeError(entity_df) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema + entity_df_event_timestamp_col = ( + offline_utils.infer_event_timestamp_from_entity_df(entity_schema) ) expected_join_keys = offline_utils.get_expected_join_keys( @@ -140,7 +140,10 @@ def query_generator() -> Iterator[str]: ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, config, df_query, + entity_df, + entity_df_event_timestamp_col, + config, + df_query, ) query_context = offline_utils.get_feature_view_query_context( diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index 6671a477654..c84fce03dcd 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -40,7 +40,10 @@ def postgres_container(): log_string_to_wait_for = "database system is ready to accept connections" waited = wait_for_logs( - container=container, predicate=log_string_to_wait_for, timeout=30, interval=10, + container=container, + predicate=log_string_to_wait_for, + timeout=30, + interval=10, ) logger.info("Waited for %s seconds until postgres container was up", waited) @@ -52,7 +55,9 @@ class PostgreSQLDataSourceCreator(DataSourceCreator, OnlineStoreCreator): def __init__( self, project_name: str, fixture_request: pytest.FixtureRequest, **kwargs ): - super().__init__(project_name,) + super().__init__( + project_name, + ) self.project_name = project_name self.container = fixture_request.getfixturevalue("postgres_container") diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 2a0925d9294..8e0badd7325 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -130,13 +130,16 @@ def get_historical_features( tmp_entity_df_table_name = offline_utils.get_temp_entity_table_name() entity_schema = _get_entity_schema( - spark_session=spark_session, entity_df=entity_df, + spark_session=spark_session, + entity_df=entity_df, ) event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( entity_schema=entity_schema, ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, event_timestamp_col, spark_session, + entity_df, + event_timestamp_col, + spark_session, ) _upload_entity_df( spark_session=spark_session, diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index ade1e543650..0ddeaad3545 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -80,7 +80,10 @@ def __init__( RuntimeWarning, ) self.spark_options = SparkOptions( - table=table, query=query, path=path, file_format=file_format, + table=table, + query=query, + path=path, + file_format=file_format, ) @property @@ -304,7 +307,10 @@ def __init__( file_format: Optional[str] = None, ): self.spark_options = SparkOptions( - table=table, query=query, path=path, file_format=file_format, + table=table, + query=query, + path=path, + file_format=file_format, ) @staticmethod diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py index 9c73f018197..7d31aa90fb4 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py @@ -7,6 +7,7 @@ FULL_REPO_CONFIGS = [ IntegrationTestRepoConfig( - provider="local", offline_store_creator=TrinoSourceCreator, + provider="local", + offline_store_creator=TrinoSourceCreator, ), ] diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py index f2b9f785a05..67efa6a27f8 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py @@ -63,7 +63,10 @@ def __init__( ) self.exposed_port = self.container.get_exposed_port("8080") self.client = Trino( - user="user", catalog="memory", host="localhost", port=self.exposed_port, + user="user", + catalog="memory", + host="localhost", + port=self.exposed_port, ) def teardown(self): diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py index 87a99b820e8..88a9021d1c7 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py @@ -202,7 +202,10 @@ def pull_latest_from_table_or_query( # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized return TrinoRetrievalJob( - query=query, client=client, config=config, full_feature_names=False, + query=query, + client=client, + config=config, + full_feature_names=False, ) @staticmethod @@ -240,8 +243,10 @@ def get_historical_features( connector=config.offline_store.connector, ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema=entity_schema + entity_df_event_timestamp_col = ( + offline_utils.infer_event_timestamp_from_entity_df( + entity_schema=entity_schema + ) ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( @@ -327,11 +332,17 @@ def pull_all_from_table_or_query( WHERE {timestamp_field} BETWEEN TIMESTAMP '{start_date}' AND TIMESTAMP '{end_date}' """ return TrinoRetrievalJob( - query=query, client=client, config=config, full_feature_names=False, + query=query, + client=client, + config=config, + full_feature_names=False, ) -def _get_table_reference_for_new_entity(catalog: str, dataset_name: str,) -> str: +def _get_table_reference_for_new_entity( + catalog: str, + dataset_name: str, +) -> str: """Gets the table_id for the new entity to be uploaded.""" table_name = offline_utils.get_temp_entity_table_name() return f"{catalog}.{dataset_name}.{table_name}" diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py index b559d0e59ea..d82650712ec 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py @@ -61,7 +61,8 @@ def from_proto(cls, trino_options_proto: DataSourceProto.TrinoOptions): Returns a TrinoOptions object based on the trino_options protobuf """ trino_options = cls( - table=trino_options_proto.table, query=trino_options_proto.query, + table=trino_options_proto.table, + query=trino_options_proto.query, ) return trino_options @@ -74,7 +75,8 @@ def to_proto(self) -> DataSourceProto.TrinoOptions: """ trino_options_proto = DataSourceProto.TrinoOptions( - table=self.table, query=self.query, + table=self.table, + query=self.query, ) return trino_options_proto diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index ae98f8d0c29..1af98c14375 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -88,7 +88,8 @@ def _to_arrow_internal(self): def persist(self, storage: SavedDatasetStorage): assert isinstance(storage, SavedDatasetFileStorage) filesystem, path = FileSource.create_filesystem_and_path( - storage.file_options.uri, storage.file_options.s3_endpoint_override, + storage.file_options.uri, + storage.file_options.s3_endpoint_override, ) if path.endswith(".parquet"): @@ -314,7 +315,9 @@ def evaluate_offline_job(): # TODO(kevjumba): remove try catch when fix is merged upstream in Dask. try: if created_timestamp_column: - source_df = source_df.sort_values(by=created_timestamp_column,) + source_df = source_df.sort_values( + by=created_timestamp_column, + ) source_df = source_df.sort_values(by=timestamp_field) @@ -352,7 +355,8 @@ def evaluate_offline_job(): # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized return FileRetrievalJob( - evaluation_function=evaluate_offline_job, full_feature_names=False, + evaluation_function=evaluate_offline_job, + full_feature_names=False, ) @staticmethod @@ -394,7 +398,8 @@ def write_logged_features( data = pyarrow.parquet.read_table(data, use_threads=False, pre_buffer=False) filesystem, path = FileSource.create_filesystem_and_path( - destination.path, destination.s3_endpoint_override, + destination.path, + destination.s3_endpoint_override, ) pyarrow.dataset.write_dataset( @@ -453,7 +458,8 @@ def offline_write_batch( def _get_entity_df_event_timestamp_range( - entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str, + entity_df: Union[pd.DataFrame, str], + entity_df_event_timestamp_col: str, ) -> Tuple[datetime, datetime]: if not isinstance(entity_df, pd.DataFrame): raise ValueError( @@ -483,7 +489,10 @@ def _read_datasource(data_source) -> dd.DataFrame: else None ) - return dd.read_parquet(data_source.path, storage_options=storage_options,) + return dd.read_parquet( + data_source.path, + storage_options=storage_options, + ) def _field_mapping( @@ -533,7 +542,8 @@ def _field_mapping( # Make sure to not have duplicated columns if entity_df_event_timestamp_col == timestamp_field: df_to_join = _run_dask_field_mapping( - df_to_join, {timestamp_field: f"__{timestamp_field}"}, + df_to_join, + {timestamp_field: f"__{timestamp_field}"}, ) timestamp_field = f"__{timestamp_field}" @@ -571,7 +581,9 @@ def _merge( def _normalize_timestamp( - df_to_join: dd.DataFrame, timestamp_field: str, created_timestamp_column: str, + df_to_join: dd.DataFrame, + timestamp_field: str, + created_timestamp_column: str, ) -> dd.DataFrame: df_to_join_types = df_to_join.dtypes timestamp_field_type = df_to_join_types[timestamp_field] @@ -645,14 +657,18 @@ def _drop_duplicates( df_to_join = df_to_join.persist() df_to_join = df_to_join.drop_duplicates( - all_join_keys + [entity_df_event_timestamp_col], keep="last", ignore_index=True, + all_join_keys + [entity_df_event_timestamp_col], + keep="last", + ignore_index=True, ) return df_to_join.persist() def _drop_columns( - df_to_join: dd.DataFrame, timestamp_field: str, created_timestamp_column: str, + df_to_join: dd.DataFrame, + timestamp_field: str, + created_timestamp_column: str, ) -> dd.DataFrame: entity_df_with_features = df_to_join.drop([timestamp_field], axis=1).persist() diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index e24317e8599..c8a0cb8a5c8 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -85,7 +85,8 @@ def to_df( for odfv in self.on_demand_feature_views: features_df = features_df.join( odfv.get_transformed_features_df( - features_df, self.full_feature_names, + features_df, + self.full_feature_names, ) ) @@ -129,7 +130,8 @@ def to_arrow( for odfv in self.on_demand_feature_views: features_df = features_df.join( odfv.get_transformed_features_df( - features_df, self.full_feature_names, + features_df, + self.full_feature_names, ) ) diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 1d7b79727ec..df70f958f71 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -192,12 +192,15 @@ def get_historical_features( entity_df, redshift_client, config, s3_resource ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema + entity_df_event_timestamp_col = ( + offline_utils.infer_event_timestamp_from_entity_df(entity_schema) ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, redshift_client, config, + entity_df, + entity_df_event_timestamp_col, + redshift_client, + config, ) @contextlib.contextmanager diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 71394c44038..0f4c6a7b523 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -224,12 +224,14 @@ def get_historical_features( entity_schema = _get_entity_schema(entity_df, snowflake_conn, config) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema + entity_df_event_timestamp_col = ( + offline_utils.infer_event_timestamp_from_entity_df(entity_schema) ) entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, snowflake_conn, + entity_df, + entity_df_event_timestamp_col, + snowflake_conn, ) @contextlib.contextmanager diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index b072c6e871b..258fba71b14 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -360,4 +360,6 @@ def to_proto(self) -> LoggingConfigProto: ) def to_data_source(self) -> DataSource: - return SnowflakeSource(table=self.table_name,) + return SnowflakeSource( + table=self.table_name, + ) diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py index 81727067f5a..e3999ab05a6 100644 --- a/sdk/python/feast/infra/online_stores/contrib/postgres.py +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -112,7 +112,9 @@ def online_read( SELECT entity_key, feature_name, value, event_ts FROM {} WHERE entity_key = ANY(%s); """ - ).format(sql.Identifier(_table_id(project, table)),), + ).format( + sql.Identifier(_table_id(project, table)), + ), (keys,), ) @@ -228,7 +230,10 @@ def _drop_table_and_index(table_name): DROP TABLE IF EXISTS {}; DROP INDEX IF EXISTS {}; """ - ).format(sql.Identifier(table_name), sql.Identifier(f"{table_name}_ek"),) + ).format( + sql.Identifier(table_name), + sql.Identifier(f"{table_name}_ek"), + ) def _to_naive_utc(ts: datetime): diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index fc3659ea1ae..8c5989789ee 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -197,7 +197,12 @@ def _write_minibatch( document_id = compute_entity_id(entity_key) key = client.key( - "Project", project, "Table", table.name, "Row", document_id, + "Project", + project, + "Table", + table.name, + "Row", + document_id, ) entity = datastore.Entity( @@ -318,7 +323,10 @@ def _initialize_client( project_id: Optional[str], namespace: Optional[str] ) -> datastore.Client: try: - client = datastore.Client(project=project_id, namespace=namespace,) + client = datastore.Client( + project=project_id, + namespace=namespace, + ) return client except DefaultCredentialsError as e: raise FeastProviderLoginError( @@ -394,7 +402,8 @@ def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: @staticmethod def from_proto(datastore_table_proto: DatastoreTableProto) -> Any: datastore_table = DatastoreTable( - project=datastore_table_proto.project, name=datastore_table_proto.name, + project=datastore_table_proto.project, + name=datastore_table_proto.name, ) # Distinguish between null and empty string, since project_id and namespace are StringValues. diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 6919f2cc298..530b0781806 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -338,7 +338,8 @@ def _get_table_name( def _delete_table_idempotent( - dynamodb_resource, table_name: str, + dynamodb_resource, + table_name: str, ): try: table = dynamodb_resource.Table(table_name) @@ -399,7 +400,8 @@ def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: @staticmethod def from_proto(dynamodb_table_proto: DynamoDBTableProto) -> Any: return DynamoDBTable( - name=dynamodb_table_proto.name, region=dynamodb_table_proto.region, + name=dynamodb_table_proto.name, + region=dynamodb_table_proto.region, ) def update(self): diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 2f0e9029426..6689897d145 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -283,7 +283,10 @@ def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: @staticmethod def from_proto(sqlite_table_proto: SqliteTableProto) -> Any: - return SqliteTable(path=sqlite_table_proto.path, name=sqlite_table_proto.name,) + return SqliteTable( + path=sqlite_table_proto.path, + name=sqlite_table_proto.name, + ) def update(self): self.conn.execute( diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index e31eb1e1773..0b09f5df430 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -135,7 +135,10 @@ def update_infra( ) def teardown_infra( - self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], + self, + project: str, + tables: Sequence[FeatureView], + entities: Sequence[Entity], ) -> None: set_usage_attribute("provider", self.__class__.__name__) if self.online_store: @@ -187,7 +190,10 @@ def online_read( return result def ingest_df( - self, feature_view: FeatureView, entities: List[Entity], df: pd.DataFrame, + self, + feature_view: FeatureView, + entities: List[Entity], + df: pd.DataFrame, ): set_usage_attribute("provider", self.__class__.__name__) table = pa.Table.from_pandas(df) diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 9695e4d736f..086c9ec6b3d 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -74,7 +74,10 @@ def plan_infra( @abc.abstractmethod def teardown_infra( - self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], + self, + project: str, + tables: Sequence[FeatureView], + entities: Sequence[Entity], ): """ Tear down all cloud resources for a repo. @@ -114,7 +117,10 @@ def online_write_batch( ... def ingest_df( - self, feature_view: FeatureView, entities: List[Entity], df: pd.DataFrame, + self, + feature_view: FeatureView, + entities: List[Entity], + df: pd.DataFrame, ): """ Ingests a DataFrame directly into the online store @@ -122,7 +128,9 @@ def ingest_df( pass def ingest_df_to_offline_store( - self, feature_view: FeatureView, df: pyarrow.Table, + self, + feature_view: FeatureView, + df: pyarrow.Table, ): """ Ingests a DataFrame directly into the offline store diff --git a/sdk/python/feast/infra/registry_stores/sql.py b/sdk/python/feast/infra/registry_stores/sql.py index 2d3ac9d6831..3daf0489811 100644 --- a/sdk/python/feast/infra/registry_stores/sql.py +++ b/sdk/python/feast/infra/registry_stores/sql.py @@ -486,7 +486,10 @@ def list_project_metadata( return [] def apply_saved_dataset( - self, saved_dataset: SavedDataset, project: str, commit: bool = True, + self, + saved_dataset: SavedDataset, + project: str, + commit: bool = True, ): return self._apply_object( saved_datasets, @@ -594,7 +597,9 @@ def apply_user_metadata( getattr(table.c, "feature_view_name") == name, table.c.project_id == project, ) - .values(values,) + .values( + values, + ) ) conn.execute(update_stmt) else: @@ -699,7 +704,9 @@ def _apply_object( update_stmt = ( update(table) .where(getattr(table.c, id_field_name) == name) - .values(values,) + .values( + values, + ) ) conn.execute(update_stmt) else: @@ -709,7 +716,9 @@ def _apply_object( "last_updated_timestamp": update_time, "project_id": project, } - insert_stmt = insert(table).values(values,) + insert_stmt = insert(table).values( + values, + ) conn.execute(insert_stmt) self._set_last_updated_metadata(update_datetime, project) @@ -818,7 +827,9 @@ def _set_last_updated_metadata(self, last_updated: datetime, project: str): ) conn.execute(update_stmt) else: - insert_stmt = insert(feast_metadata).values(values,) + insert_stmt = insert(feast_metadata).values( + values, + ) conn.execute(insert_stmt) def _get_last_updated_metadata(self, project: str): diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 51aecbf8a72..3c8ad9d71b0 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -89,7 +89,10 @@ def execute_redshift_statement_async( """ try: return redshift_data_client.execute_statement( - ClusterIdentifier=cluster_id, Database=database, DbUser=user, Sql=query, + ClusterIdentifier=cluster_id, + Database=database, + DbUser=user, + Sql=query, ) except ClientError as e: if e.response["Error"]["Code"] == "ValidationException": @@ -157,7 +160,11 @@ def get_redshift_statement_result(redshift_data_client, statement_id: str) -> di return redshift_data_client.get_statement_result(Id=statement_id) -def upload_df_to_s3(s3_resource, s3_path: str, df: pd.DataFrame,) -> None: +def upload_df_to_s3( + s3_resource, + s3_path: str, + df: pd.DataFrame, +) -> None: """Uploads a Pandas DataFrame to S3 as a parquet file Args: @@ -236,11 +243,19 @@ def upload_df_to_redshift( def delete_redshift_table( - redshift_data_client, cluster_id: str, database: str, user: str, table_name: str, + redshift_data_client, + cluster_id: str, + database: str, + user: str, + table_name: str, ): drop_query = f"DROP {table_name} IF EXISTS" execute_redshift_statement( - redshift_data_client, cluster_id, database, user, drop_query, + redshift_data_client, + cluster_id, + database, + user, + drop_query, ) @@ -376,7 +391,11 @@ def temporarily_upload_df_to_redshift( # Clean up the uploaded Redshift table execute_redshift_statement( - redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}", + redshift_data_client, + cluster_id, + database, + user, + f"DROP TABLE {table_name}", ) @@ -423,7 +442,11 @@ def temporarily_upload_arrow_table_to_redshift( # Clean up the uploaded Redshift table execute_redshift_statement( - redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}", + redshift_data_client, + cluster_id, + database, + user, + f"DROP TABLE {table_name}", ) @@ -491,7 +514,13 @@ def unload_redshift_query_to_pa( bucket, key = get_bucket_and_key(s3_path) execute_redshift_query_and_unload_to_s3( - redshift_data_client, cluster_id, database, user, s3_path, iam_role, query, + redshift_data_client, + cluster_id, + database, + user, + s3_path, + iam_role, + query, ) with tempfile.TemporaryDirectory() as temp_dir: diff --git a/sdk/python/feast/infra/utils/postgres/connection_utils.py b/sdk/python/feast/infra/utils/postgres/connection_utils.py index 6dbb4a4bc01..0e9cbf96fe4 100644 --- a/sdk/python/feast/infra/utils/postgres/connection_utils.py +++ b/sdk/python/feast/infra/utils/postgres/connection_utils.py @@ -64,5 +64,8 @@ def get_query_schema(config: PostgreSQLConfig, sql_query: str) -> Dict[str, np.d """ with _get_conn(config) as conn: conn.set_session(readonly=True) - df = pd.read_sql(f"SELECT * FROM {sql_query} LIMIT 0", conn,) + df = pd.read_sql( + f"SELECT * FROM {sql_query} LIMIT 0", + conn, + ) return dict(zip(df.columns, df.dtypes)) diff --git a/sdk/python/feast/infra/utils/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake_utils.py index 05834ae4369..78d505bd083 100644 --- a/sdk/python/feast/infra/utils/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake_utils.py @@ -342,7 +342,10 @@ def upload_df( def upload_local_pq( - path: Path, cursor: SnowflakeCursor, stage_name: str, parallel: int = 4, + path: Path, + cursor: SnowflakeCursor, + stage_name: str, + parallel: int = 4, ): """ Args: diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index bad4edba810..b4c136ab135 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -295,7 +295,10 @@ def to_proto(self) -> OnDemandFeatureViewProto: sources[source_name] = OnDemandSource( feature_view_projection=fv_projection.to_proto() ) - for (source_name, request_sources,) in self.source_request_sources.items(): + for ( + source_name, + request_sources, + ) in self.source_request_sources.items(): sources[source_name] = OnDemandSource( request_data_source=request_sources.to_proto() ) @@ -305,7 +308,8 @@ def to_proto(self) -> OnDemandFeatureViewProto: features=[feature.to_proto() for feature in self.features], sources=sources, user_defined_function=UserDefinedFunctionProto( - name=self.udf.__name__, body=dill.dumps(self.udf, recurse=True), + name=self.udf.__name__, + body=dill.dumps(self.udf, recurse=True), ), description=self.description, tags=self.tags, @@ -326,7 +330,10 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): A OnDemandFeatureView object based on the on-demand feature view protobuf. """ sources = [] - for (_, on_demand_source,) in on_demand_feature_view_proto.spec.sources.items(): + for ( + _, + on_demand_source, + ) in on_demand_feature_view_proto.spec.sources.items(): if on_demand_source.WhichOneof("source") == "feature_view": sources.append( FeatureView.from_proto(on_demand_source.feature_view).projection @@ -393,7 +400,9 @@ def get_request_data_schema(self) -> Dict[str, ValueType]: return schema def get_transformed_features_df( - self, df_with_features: pd.DataFrame, full_feature_names: bool = False, + self, + df_with_features: pd.DataFrame, + full_feature_names: bool = False, ) -> pd.DataFrame: # Apply on demand transformations columns_to_cleanup = [] diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index f72fd717d23..ce8046a42ee 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -491,7 +491,10 @@ def apply_materialization( # Saved dataset operations @abstractmethod def apply_saved_dataset( - self, saved_dataset: SavedDataset, project: str, commit: bool = True, + self, + saved_dataset: SavedDataset, + project: str, + commit: bool = True, ): """ Stores a saved dataset metadata with Feast @@ -581,17 +584,17 @@ def get_validation_reference( self, name: str, project: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Args: - name: Name of dataset - project: Feast project that this dataset belongs to - allow_cache: Whether to allow returning this dataset from a cached registry + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry - Returns: - Returns either the specified ValidationReference, or raises an exception if - none is found - """ + Returns: + Returns either the specified ValidationReference, or raises an exception if + none is found + """ # TODO: Needs to be implemented. def list_validation_references( @@ -1574,7 +1577,10 @@ def delete_entity(self, name: str, project: str, commit: bool = True): raise EntityNotFoundException(name, project) def apply_saved_dataset( - self, saved_dataset: SavedDataset, project: str, commit: bool = True, + self, + saved_dataset: SavedDataset, + project: str, + commit: bool = True, ): """ Stores a saved dataset metadata with Feast @@ -1692,20 +1698,18 @@ def get_validation_reference( self, name: str, project: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Args: - name: Name of dataset - project: Feast project that this dataset belongs to - allow_cache: Whether to allow returning this dataset from a cached registry + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry - Returns: - Returns either the specified ValidationReference, or raises an exception if - none is found - """ - registry_proto = self._get_registry_proto( - project=project, allow_cache=allow_cache - ) + Returns: + Returns either the specified ValidationReference, or raises an exception if + none is found + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) for validation_reference in registry_proto.validation_references: if ( validation_reference.name == name diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index f7f564df6ff..37e2cf95e5f 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -272,7 +272,8 @@ def _validate_online_store_config(cls, values): online_config_class(**values["online_store"]) except ValidationError as e: raise ValidationError( - [ErrorWrapper(e, loc="online_store")], model=RepoConfig, + [ErrorWrapper(e, loc="online_store")], + model=RepoConfig, ) return values @@ -306,7 +307,8 @@ def _validate_offline_store_config(cls, values): offline_config_class(**values["offline_store"]) except ValidationError as e: raise ValidationError( - [ErrorWrapper(e, loc="offline_store")], model=RepoConfig, + [ErrorWrapper(e, loc="offline_store")], + model=RepoConfig, ) return values @@ -340,7 +342,8 @@ def _validate_feature_server_config(cls, values): feature_server_config_class(**values["feature_server"]) except ValidationError as e: raise ValidationError( - [ErrorWrapper(e, loc="feature_server")], model=RepoConfig, + [ErrorWrapper(e, loc="feature_server")], + model=RepoConfig, ) return values @@ -377,7 +380,12 @@ def write_to_path(self, repo_path: Path): config_path = repo_path / "feature_store.yaml" with open(config_path, mode="w") as f: yaml.dump( - yaml.safe_load(self.json(exclude={"repo_path"}, exclude_unset=True,)), + yaml.safe_load( + self.json( + exclude={"repo_path"}, + exclude_unset=True, + ) + ), f, sort_keys=False, ) diff --git a/sdk/python/feast/stream_feature_view.py b/sdk/python/feast/stream_feature_view.py index f19b1fcff7e..29e8abb7da2 100644 --- a/sdk/python/feast/stream_feature_view.py +++ b/sdk/python/feast/stream_feature_view.py @@ -179,7 +179,8 @@ def to_proto(self): udf_proto = None if self.udf: udf_proto = UserDefinedFunctionProto( - name=self.udf.__name__, body=dill.dumps(self.udf, recurse=True), + name=self.udf.__name__, + body=dill.dumps(self.udf, recurse=True), ) spec = StreamFeatureViewSpecProto( name=self.name, diff --git a/sdk/python/feast/templates/aws/test.py b/sdk/python/feast/templates/aws/test.py index 07410954f7b..3d223e8f266 100644 --- a/sdk/python/feast/templates/aws/test.py +++ b/sdk/python/feast/templates/aws/test.py @@ -54,7 +54,8 @@ def main(): # Retrieve features from the online store (Firestore) online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + features=features, + entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print() diff --git a/sdk/python/feast/templates/gcp/test.py b/sdk/python/feast/templates/gcp/test.py index 538334044bf..8ff11bda5c7 100644 --- a/sdk/python/feast/templates/gcp/test.py +++ b/sdk/python/feast/templates/gcp/test.py @@ -54,7 +54,8 @@ def main(): # Retrieve features from the online store (Firestore) online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + features=features, + entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print() diff --git a/sdk/python/feast/templates/postgres/driver_repo.py b/sdk/python/feast/templates/postgres/driver_repo.py index 4096943bb73..61e32eb58ef 100644 --- a/sdk/python/feast/templates/postgres/driver_repo.py +++ b/sdk/python/feast/templates/postgres/driver_repo.py @@ -6,7 +6,10 @@ ) from feast.types import Float32, Int64 -driver = Entity(name="driver_id", join_keys=["driver_id"],) +driver = Entity( + name="driver_id", + join_keys=["driver_id"], +) driver_stats_source = PostgreSQLSource( diff --git a/sdk/python/feast/templates/postgres/test.py b/sdk/python/feast/templates/postgres/test.py index 81ac2996985..d547bc8c649 100644 --- a/sdk/python/feast/templates/postgres/test.py +++ b/sdk/python/feast/templates/postgres/test.py @@ -52,7 +52,8 @@ def main(): # Retrieve features from the online store online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + features=features, + entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print() diff --git a/sdk/python/feast/templates/snowflake/test.py b/sdk/python/feast/templates/snowflake/test.py index 32aa6380d51..3c33f6aefda 100644 --- a/sdk/python/feast/templates/snowflake/test.py +++ b/sdk/python/feast/templates/snowflake/test.py @@ -54,7 +54,8 @@ def main(): # Retrieve features from the online store online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + features=features, + entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print() diff --git a/sdk/python/feast/templates/spark/example.py b/sdk/python/feast/templates/spark/example.py index d006353118b..8ad48f53fc4 100644 --- a/sdk/python/feast/templates/spark/example.py +++ b/sdk/python/feast/templates/spark/example.py @@ -16,8 +16,14 @@ # Entity definitions -driver = Entity(name="driver", description="driver id",) -customer = Entity(name="customer", description="customer id",) +driver = Entity( + name="driver", + description="driver id", +) +customer = Entity( + name="customer", + description="customer id", +) # Sources driver_hourly_stats = SparkSource( diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index 40c1d62e7d2..0ba1725f173 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -178,7 +178,9 @@ def __str__(self): } -def from_value_type(value_type: ValueType,) -> FeastType: +def from_value_type( + value_type: ValueType, +) -> FeastType: """ Converts a ValueType enum to a Feast type. diff --git a/sdk/python/feast/ui_server.py b/sdk/python/feast/ui_server.py index cb275c8f912..4d1fd67dc1e 100644 --- a/sdk/python/feast/ui_server.py +++ b/sdk/python/feast/ui_server.py @@ -83,7 +83,9 @@ def catch_all(): return Response(content, media_type="text/html") app.mount( - "/", StaticFiles(directory=ui_dir, html=True), name="site", + "/", + StaticFiles(directory=ui_dir, html=True), + name="site", ) return app diff --git a/sdk/python/feast/utils.py b/sdk/python/feast/utils.py index af22fbca3f4..1b999341597 100644 --- a/sdk/python/feast/utils.py +++ b/sdk/python/feast/utils.py @@ -151,7 +151,8 @@ def _get_column_names( def _run_pyarrow_field_mapping( - table: pyarrow.Table, field_mapping: Dict[str, str], + table: pyarrow.Table, + field_mapping: Dict[str, str], ) -> pyarrow.Table: # run field mapping in the forward direction cols = table.column_names @@ -163,7 +164,8 @@ def _run_pyarrow_field_mapping( def _run_dask_field_mapping( - table: dd.DataFrame, field_mapping: Dict[str, str], + table: dd.DataFrame, + field_mapping: Dict[str, str], ): if field_mapping: # run field mapping in the forward direction diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 9e47356000c..010c9c2f40f 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -115,7 +115,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.1 +click==8.1.3 # via # black # bowler @@ -177,7 +177,7 @@ executing==0.8.3 # via stack-data fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -264,6 +264,8 @@ googleapis-common-protos==1.56.4 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -417,7 +419,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.2.5 # via # altair # db-dtypes @@ -426,7 +428,7 @@ pandas==1.4.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) parso==0.8.3 # via jedi @@ -646,7 +648,6 @@ six==1.16.0 # happybase # mock # msrestazure - # pandavro # python-dateutil # virtualenv sniffio==1.2.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 0440a171ed1..22e4d44e1a9 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -24,7 +24,7 @@ certifi==2022.6.15 # via requests charset-normalizer==2.1.0 # via requests -click==8.0.1 +click==8.1.3 # via # bowler # feast (setup.py) @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -57,6 +57,8 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -95,11 +97,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.2.5 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) partd==1.2.0 # via dask @@ -113,7 +115,7 @@ protobuf==3.20.1 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -150,7 +152,6 @@ six==1.16.0 # via # google-auth # grpcio - # pandavro # python-dateutil sniffio==1.2.0 # via anyio diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 67db3a978f4..a943eb3c329 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -33,9 +33,7 @@ anyio==3.6.1 # starlette # watchfiles appdirs==1.4.4 - # via - # black - # fissix + # via fissix appnope==0.1.3 # via ipython asn1crypto==1.5.1 @@ -53,7 +51,6 @@ async-timeout==4.0.2 attrs==21.4.0 # via # aiohttp - # black # bowler # jsonschema # pytest @@ -79,7 +76,7 @@ backports-zoneinfo==0.2.1 # via # pytz-deprecation-shim # tzlocal -black==19.10b0 +black==22.6.0 # via feast (setup.py) boto3==1.20.23 # via @@ -119,7 +116,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.1 +click==8.1.3 # via # black # bowler @@ -181,7 +178,7 @@ executing==0.8.3 # via stack-data fastapi==0.78.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -268,6 +265,8 @@ googleapis-common-protos==1.56.3 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -389,7 +388,9 @@ mypy==0.961 # feast (setup.py) # sqlalchemy mypy-extensions==0.4.3 - # via mypy + # via + # black + # mypy mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 @@ -423,7 +424,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.2.5 # via # altair # db-dtypes @@ -432,7 +433,7 @@ pandas==1.4.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) parso==0.8.3 # via jedi @@ -451,7 +452,9 @@ pickleshare==0.7.5 pip-tools==6.8.0 # via feast (setup.py) platformdirs==2.5.2 - # via virtualenv + # via + # black + # virtualenv pluggy==1.0.0 # via pytest ply==3.11 @@ -654,7 +657,6 @@ six==1.16.0 # happybase # mock # msrestazure - # pandavro # python-dateutil # virtualenv sniffio==1.2.0 @@ -703,11 +705,11 @@ thriftpy2==0.4.14 # via happybase toml==0.10.2 # via - # black # feast (setup.py) # pre-commit tomli==2.0.1 # via + # black # build # coverage # mypy @@ -730,8 +732,6 @@ traitlets==5.3.0 # nbformat trino==0.314.0 # via feast (setup.py) -typed-ast==1.5.4 - # via black typeguard==2.13.3 # via feast (setup.py) types-protobuf==3.19.22 @@ -758,6 +758,7 @@ typing-extensions==4.3.0 # via # aioitertools # azure-core + # black # great-expectations # mypy # pydantic diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index a09289f662d..9d192d08191 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -24,7 +24,7 @@ certifi==2022.6.15 # via requests charset-normalizer==2.1.0 # via requests -click==8.0.1 +click==8.1.3 # via # bowler # feast (setup.py) @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.78.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -57,6 +57,8 @@ googleapis-common-protos==1.56.3 # feast (setup.py) # google-api-core # tensorflow-metadata +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -97,11 +99,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.2.5 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) partd==1.2.0 # via dask @@ -115,7 +117,7 @@ protobuf==3.20.1 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -152,7 +154,6 @@ six==1.16.0 # via # google-auth # grpcio - # pandavro # python-dateutil sniffio==1.2.0 # via anyio diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 8be0c360524..222660bd258 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -115,7 +115,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.1 +click==8.1.3 # via # black # bowler @@ -177,7 +177,7 @@ executing==0.8.3 # via stack-data fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -264,6 +264,8 @@ googleapis-common-protos==1.56.4 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -417,7 +419,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.2.5 # via # altair # db-dtypes @@ -426,7 +428,7 @@ pandas==1.4.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) parso==0.8.3 # via jedi @@ -648,7 +650,6 @@ six==1.16.0 # happybase # mock # msrestazure - # pandavro # python-dateutil # virtualenv sniffio==1.2.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 91e66020833..fe11b7348b4 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -24,7 +24,7 @@ certifi==2022.6.15 # via requests charset-normalizer==2.1.0 # via requests -click==8.0.1 +click==8.1.3 # via # bowler # feast (setup.py) @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.2 +fastavro==1.5.1 # via # feast (setup.py) # pandavro @@ -57,6 +57,8 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata +greenlet==1.1.2 + # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -95,11 +97,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.2.5 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.7.0 # via feast (setup.py) partd==1.2.0 # via dask @@ -113,7 +115,7 @@ protobuf==3.20.1 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -150,7 +152,6 @@ six==1.16.0 # via # google-auth # grpcio - # pandavro # python-dateutil sniffio==1.2.0 # via anyio diff --git a/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py index 6e22c93e5f6..03070887c46 100644 --- a/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py +++ b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py @@ -60,5 +60,7 @@ def test_online_retrieval(environment, universal_data_sources, benchmark): unprefixed_feature_refs.remove("conv_rate_plus_val_to_add") benchmark( - fs.get_online_features, features=feature_refs, entity_rows=entity_rows, + fs.get_online_features, + features=feature_refs, + entity_rows=entity_rows, ) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index ac30149cfad..e296aeedbd4 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -92,7 +92,10 @@ def pytest_addoption(parser): help="Run tests with external dependencies", ) parser.addoption( - "--benchmark", action="store_true", default=False, help="Run benchmark tests", + "--benchmark", + action="store_true", + default=False, + help="Run benchmark tests", ) parser.addoption( "--goserver", @@ -334,7 +337,8 @@ def feature_server_endpoint(environment): proc.start() # Wait for server to start wait_retry_backoff( - lambda: (None, _check_port_open("localhost", port)), timeout_secs=10, + lambda: (None, _check_port_open("localhost", port)), + timeout_secs=10, ) yield f"http://localhost:{port}" @@ -372,7 +376,9 @@ def universal_data_sources(environment) -> TestData: def e2e_data_sources(environment: Environment): df = create_basic_driver_dataset() data_source = environment.data_source_creator.create_data_source( - df, environment.feature_store.project, field_mapping={"ts_1": "ts"}, + df, + environment.feature_store.project, + field_mapping={"ts_1": "ts"}, ) return df, data_source diff --git a/sdk/python/tests/doctest/test_all.py b/sdk/python/tests/doctest/test_all.py index 31f181ad535..0412e34c36c 100644 --- a/sdk/python/tests/doctest/test_all.py +++ b/sdk/python/tests/doctest/test_all.py @@ -17,7 +17,10 @@ def setup_feature_store(): init_repo("feature_repo", "local") fs = FeatureStore(repo_path="feature_repo") - driver = Entity(name="driver_id", description="driver id",) + driver = Entity( + name="driver_id", + description="driver id", + ) driver_hourly_stats = FileSource( path="feature_repo/data/driver_stats.parquet", timestamp_field="event_timestamp", @@ -88,7 +91,8 @@ def test_docstrings(): setup_function() test_suite = doctest.DocTestSuite( - temp_module, optionflags=doctest.ELLIPSIS, + temp_module, + optionflags=doctest.ELLIPSIS, ) if test_suite.countTestCases() > 0: result = unittest.TextTestRunner(sys.stdout).run(test_suite) diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index 8d6d96d9ef1..5abd9fb18ad 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -28,11 +28,13 @@ ) customer_driver_combined_source = BigQuerySource( - table="feast-oss.public.customer_driver", timestamp_field="event_timestamp", + table="feast-oss.public.customer_driver", + timestamp_field="event_timestamp", ) driver_locations_push_source = PushSource( - name="driver_locations_push", batch_source=driver_locations_source, + name="driver_locations_push", + batch_source=driver_locations_source, ) driver = Entity( diff --git a/sdk/python/tests/example_repos/example_feature_repo_2.py b/sdk/python/tests/example_repos/example_feature_repo_2.py index 073c48c1c1e..21476e3779f 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_2.py +++ b/sdk/python/tests/example_repos/example_feature_repo_2.py @@ -9,7 +9,10 @@ created_timestamp_column="created", ) -driver = Entity(name="driver_id", description="driver id",) +driver = Entity( + name="driver_id", + description="driver id", +) driver_hourly_stats_view = FeatureView( diff --git a/sdk/python/tests/example_repos/example_feature_repo_version_0_19.py b/sdk/python/tests/example_repos/example_feature_repo_version_0_19.py index a65c031cea4..68681794f9b 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_version_0_19.py +++ b/sdk/python/tests/example_repos/example_feature_repo_version_0_19.py @@ -57,7 +57,8 @@ request_source = RequestDataSource( - name="conv_rate_input", schema={"val_to_add": ValueType.INT64}, + name="conv_rate_input", + schema={"val_to_add": ValueType.INT64}, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py index 4b079999ed7..77b435ecc99 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py @@ -6,7 +6,11 @@ path="driver_stats.parquet", # this parquet is not real and will not be read ) -driver = Entity(name="driver_id", description="driver id", join_keys=["driver"],) +driver = Entity( + name="driver_id", + description="driver id", + join_keys=["driver"], +) driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", # Intentionally use the same FeatureView name diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py index 06631505312..c30b933eafc 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py @@ -11,7 +11,11 @@ # The join key here is deliberately different from the parquet file to test the failure path. -driver = Entity(name="driver_id", description="driver id", join_keys=["driver"],) +driver = Entity( + name="driver_id", + description="driver id", + join_keys=["driver"], +) driver_hourly_stats_view = FeatureView( diff --git a/sdk/python/tests/example_repos/on_demand_feature_view_repo.py b/sdk/python/tests/example_repos/on_demand_feature_view_repo.py index ac572d5747a..5df0ee1c6ff 100644 --- a/sdk/python/tests/example_repos/on_demand_feature_view_repo.py +++ b/sdk/python/tests/example_repos/on_demand_feature_view_repo.py @@ -15,7 +15,10 @@ owner="test2@gmail.com", ) -driver = Entity(name="driver_id", description="driver id",) +driver = Entity( + name="driver_id", + description="driver id", +) driver_daily_features_view = FeatureView( name="driver_daily_features", diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index bd6f9811e84..7866465b919 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -31,7 +31,10 @@ def update_infra( pass def teardown_infra( - self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], + self, + project: str, + tables: Sequence[FeatureView], + entities: Sequence[Entity], ): pass diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 465fa41769e..1430d9cdf97 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -65,7 +65,9 @@ def server_port(environment, server_type: str): fs = environment.feature_store embedded = EmbeddedOnlineFeatureServer( - repo_path=str(fs.repo_path.absolute()), repo_config=fs.config, feature_store=fs, + repo_path=str(fs.repo_path.absolute()), + repo_config=fs.config, + feature_store=fs, ) port = free_port() if server_type == "grpc": diff --git a/sdk/python/tests/integration/e2e/test_python_feature_server.py b/sdk/python/tests/integration/e2e/test_python_feature_server.py index ac098d3f290..97b96933914 100644 --- a/sdk/python/tests/integration/e2e/test_python_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_python_feature_server.py @@ -70,7 +70,10 @@ def test_push(python_fs_client): }, } ) - response = python_fs_client.post("/push", data=json_data,) + response = python_fs_client.post( + "/push", + data=json_data, + ) # Check new pushed temperature is fetched assert response.status_code == 200 diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index 0d65640dcaf..b8908663b3e 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -118,9 +118,13 @@ def test_historical_retrieval_with_validation(environment, universal_data_source columns=["order_id", "origin_id", "destination_id"] ) reference_job = store.get_historical_features( - entity_df=entity_df, features=_features, + entity_df=entity_df, + features=_features, + ) + job = store.get_historical_features( + entity_df=entity_df, + features=_features, ) - job = store.get_historical_features(entity_df=entity_df, features=_features,) # Save dataset using reference job and retrieve it store.create_saved_dataset( @@ -149,7 +153,8 @@ def test_historical_retrieval_fails_on_validation(environment, universal_data_so ) reference_job = store.get_historical_features( - entity_df=entity_df, features=_features, + entity_df=entity_df, + features=_features, ) store.create_saved_dataset( @@ -158,7 +163,10 @@ def test_historical_retrieval_fails_on_validation(environment, universal_data_so storage=environment.data_source_creator.create_saved_dataset_destination(), ) - job = store.get_historical_features(entity_df=entity_df, features=_features,) + job = store.get_historical_features( + entity_df=entity_df, + features=_features, + ) with pytest.raises(ValidationFailed) as exc_info: job.to_df( diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 63425444179..022d3ec3f12 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -307,11 +307,12 @@ def values(self): def construct_universal_feature_views( - data_sources: UniversalDataSources, with_odfv: bool = True, + data_sources: UniversalDataSources, + with_odfv: bool = True, ) -> UniversalFeatureViews: driver_hourly_stats = create_driver_hourly_stats_feature_view(data_sources.driver) - driver_hourly_stats_base_feature_view = create_driver_hourly_stats_batch_feature_view( - data_sources.driver + driver_hourly_stats_base_feature_view = ( + create_driver_hourly_stats_batch_feature_view(data_sources.driver) ) return UniversalFeatureViews( customer=create_customer_daily_profile_feature_view(data_sources.customer), @@ -406,7 +407,8 @@ def construct_test_environment( ) else: registry = RegistryConfig( - path=str(Path(repo_dir_name) / "registry.db"), cache_ttl_seconds=1, + path=str(Path(repo_dir_name) / "registry.db"), + cache_ttl_seconds=1, ) config = RepoConfig( diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index ccc1544bb8f..7b8e5e80e67 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -124,7 +124,9 @@ def _upload_parquet_file(self, df, file_name, minio_endpoint): if not client.bucket_exists(self.bucket): client.make_bucket(self.bucket) client.fput_object( - self.bucket, file_name, self.f.name, + self.bucket, + file_name, + self.f.name, ) def create_data_source( diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 3fee0b70019..b6e9aa8fc06 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -130,7 +130,8 @@ def similarity_feature_view( def create_conv_rate_request_source(): return RequestSource( - name="conv_rate_input", schema=[Field(name="val_to_add", dtype=Int32)], + name="conv_rate_input", + schema=[Field(name="val_to_add", dtype=Int32)], ) @@ -296,7 +297,8 @@ def create_field_mapping_feature_view(source): def create_pushable_feature_view(batch_source: DataSource): push_source = PushSource( - name="location_stats_push_source", batch_source=batch_source, + name="location_stats_push_source", + batch_source=batch_source, ) return StreamFeatureView( name="pushable_location_stats", diff --git a/sdk/python/tests/integration/offline_store/test_feature_logging.py b/sdk/python/tests/integration/offline_store/test_feature_logging.py index a6f8e56de7d..5d74ee284cf 100644 --- a/sdk/python/tests/integration/offline_store/test_feature_logging.py +++ b/sdk/python/tests/integration/offline_store/test_feature_logging.py @@ -65,12 +65,14 @@ def test_feature_service_logging(environment, universal_data_sources, pass_as_pa with to_logs_dataset(first_batch, pass_as_path) as logs: store.write_logged_features( - source=feature_service, logs=logs, + source=feature_service, + logs=logs, ) with to_logs_dataset(second_batch, pass_as_path) as logs: store.write_logged_features( - source=feature_service, logs=logs, + source=feature_service, + logs=logs, ) expected_columns = list(set(logs_df.columns) - {LOG_DATE_FIELD}) diff --git a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py index dfe14d73f96..645e0f7331f 100644 --- a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py +++ b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py @@ -17,7 +17,9 @@ @pytest.mark.skip( reason="No way to run this test today. Credentials conflict with real AWS credentials in CI" ) -def test_registration_and_retrieval_from_custom_s3_endpoint(universal_data_sources,): +def test_registration_and_retrieval_from_custom_s3_endpoint( + universal_data_sources, +): config = IntegrationTestRepoConfig( offline_store_creator="tests.integration.feature_repos.universal.data_sources.file.S3FileDataSourceCreator" ) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index abaf1622c08..32e6e52d185 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -209,7 +209,10 @@ def get_expected_training_df( (f"global_stats__{k}" if full_feature_names else k): global_record.get( k, None ) - for k in ("num_rides", "avg_ride_length",) + for k in ( + "num_rides", + "avg_ride_length", + ) } ) @@ -882,5 +885,7 @@ def assert_frame_equal(expected_df, actual_df, keys): ) pd_assert_frame_equal( - expected_df, actual_df, check_dtype=False, + expected_df, + actual_df, + check_dtype=False, ) diff --git a/sdk/python/tests/integration/online_store/test_e2e_local.py b/sdk/python/tests/integration/online_store/test_e2e_local.py index c5b66e7ddce..46198a40624 100644 --- a/sdk/python/tests/integration/online_store/test_e2e_local.py +++ b/sdk/python/tests/integration/online_store/test_e2e_local.py @@ -135,7 +135,8 @@ def _test_materialize_and_online_retrieval( # Test `feast materialize-incremental` and online retrieval. r = runner.run( - ["materialize-incremental", end_date.isoformat()], cwd=Path(store.repo_path), + ["materialize-incremental", end_date.isoformat()], + cwd=Path(store.repo_path), ) assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}" diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 228069c13ca..6521c9ed2f7 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -799,7 +799,10 @@ def get_latest_feature_values_from_dataframes( entity_row, customer_df, "customer_id", "customer_id" ) latest_location_row = get_latest_row( - entity_row, location_df, "location_id", "location_id", + entity_row, + location_df, + "location_id", + "location_id", ) # Since the event timestamp columns may contain timestamps of different timezones, diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index ecc17fc06c5..15e5cf09eef 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -86,7 +86,8 @@ def test_universal_cli(environment: Environment): assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_feature_views()).is_length(4) result = runner.run( - ["data-sources", "describe", "customer_profile_source"], cwd=repo_path, + ["data-sources", "describe", "customer_profile_source"], + cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_data_sources()).is_length(4) diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 88a4b9f2491..6243e27fcad 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -87,11 +87,14 @@ def feature_store_with_s3_registry(): @pytest.mark.parametrize( - "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], ) def test_apply_entity_success(test_feature_store): entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) # Register Entity @@ -121,7 +124,9 @@ def test_apply_entity_success(test_feature_store): ) def test_apply_entity_integration(test_feature_store): entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) # Register Entity @@ -150,7 +155,8 @@ def test_apply_entity_integration(test_feature_store): @pytest.mark.parametrize( - "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], ) def test_apply_feature_view_success(test_feature_store): # Create Feature Views @@ -204,7 +210,8 @@ def test_apply_feature_view_success(test_feature_store): @pytest.mark.integration @pytest.mark.parametrize( - "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], ) @pytest.mark.parametrize("dataframe_source", [lazy_fixture("simple_dataset_1")]) def test_feature_view_inference_success(test_feature_store, dataframe_source): @@ -345,7 +352,8 @@ def test_apply_feature_view_integration(test_feature_store): @pytest.mark.parametrize( - "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], ) def test_apply_object_and_read(test_feature_store): assert isinstance(test_feature_store, FeatureStore) @@ -418,7 +426,8 @@ def test_apply_remote_repo(): @pytest.mark.parametrize( - "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], ) @pytest.mark.parametrize("dataframe_source", [lazy_fixture("simple_dataset_1")]) def test_reapply_feature_view_success(test_feature_store, dataframe_source): diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 935aa2d1a66..f660c46b15a 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -113,7 +113,8 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so data_source.event_timestamp_column = None update_data_sources_with_inferred_event_timestamp_col( - data_sources_copy.values(), RepoConfig(provider="local", project="test"), + data_sources_copy.values(), + RepoConfig(provider="local", project="test"), ) actual_event_timestamp_cols = [ source.timestamp_field for source in data_sources_copy.values() @@ -127,7 +128,8 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so def test_on_demand_features_type_inference(): # Create Feature Views date_request = RequestSource( - name="date_request", schema=[Field(name="some_date", dtype=UnixTimestamp)], + name="date_request", + schema=[Field(name="some_date", dtype=UnixTimestamp)], ) @on_demand_feature_view( @@ -193,7 +195,10 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: ) def test_datasource_inference(request_source_schema): # Create Feature Views - date_request = RequestSource(name="date_request", schema=request_source_schema,) + date_request = RequestSource( + name="date_request", + schema=request_source_schema, + ) @on_demand_feature_view( # Note: we deliberately use positional arguments here to test that they work correctly, @@ -405,10 +410,14 @@ def test_update_feature_services_with_inferred_features(simple_dataset_1): with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: entity1 = Entity(name="test1", join_keys=["id_join_key"]) feature_view_1 = FeatureView( - name="test1", entities=[entity1], source=file_source, + name="test1", + entities=[entity1], + source=file_source, ) feature_view_2 = FeatureView( - name="test2", entities=[entity1], source=file_source, + name="test2", + entities=[entity1], + source=file_source, ) feature_service = FeatureService( diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 27bbbbd2bb8..a9fecc2f04e 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -71,11 +71,14 @@ def s3_registry() -> Registry: @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) def test_apply_entity_success(test_registry): entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) project = "project" @@ -115,11 +118,14 @@ def test_apply_entity_success(test_registry): @pytest.mark.integration @pytest.mark.parametrize( - "test_registry", [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + "test_registry", + [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], ) def test_apply_entity_integration(test_registry): entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) project = "project" @@ -154,7 +160,8 @@ def test_apply_entity_integration(test_registry): @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) def test_apply_feature_view_success(test_registry): # Create Feature Views @@ -229,7 +236,8 @@ def test_apply_feature_view_success(test_registry): @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) def test_apply_on_demand_feature_view_success(test_registry): # Create Feature Views @@ -303,7 +311,8 @@ def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) def test_apply_stream_feature_view_success(test_registry): # Create Feature Views @@ -332,10 +341,14 @@ def simple_udf(x: int): description="desc", aggregations=[ Aggregation( - column="dummy_field", function="max", time_window=timedelta(days=1), + column="dummy_field", + function="max", + time_window=timedelta(days=1), ), Aggregation( - column="dummy_field2", function="count", time_window=timedelta(days=24), + column="dummy_field2", + function="count", + time_window=timedelta(days=24), ), ], timestamp_field="event_timestamp", @@ -368,7 +381,8 @@ def simple_udf(x: int): @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) # TODO(kevjumba): remove this in feast 0.24 when deprecating @pytest.mark.parametrize( @@ -384,7 +398,10 @@ def test_modify_feature_views_success(test_registry, request_source_schema): created_timestamp_column="timestamp", ) - request_source = RequestSource(name="request_source", schema=request_source_schema,) + request_source = RequestSource( + name="request_source", + schema=request_source_schema, + ) entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) @@ -493,7 +510,8 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: @pytest.mark.integration @pytest.mark.parametrize( - "test_registry", [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + "test_registry", + [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], ) def test_apply_feature_view_integration(test_registry): # Create Feature Views @@ -569,7 +587,8 @@ def test_apply_feature_view_integration(test_registry): @pytest.mark.integration @pytest.mark.parametrize( - "test_registry", [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + "test_registry", + [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], ) def test_apply_data_source_integration(test_registry: Registry): run_test_data_source_apply(test_registry) @@ -649,7 +668,9 @@ def test_commit(): test_registry = Registry(registry_config, None) entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) project = "project" diff --git a/sdk/python/tests/integration/registration/test_sql_registry.py b/sdk/python/tests/integration/registration/test_sql_registry.py index 56aff8c6d19..23a19f664ad 100644 --- a/sdk/python/tests/integration/registration/test_sql_registry.py +++ b/sdk/python/tests/integration/registration/test_sql_registry.py @@ -56,7 +56,10 @@ def pg_registry(): log_string_to_wait_for = "database system is ready to accept connections" waited = wait_for_logs( - container=container, predicate=log_string_to_wait_for, timeout=30, interval=10, + container=container, + predicate=log_string_to_wait_for, + timeout=30, + interval=10, ) logger.info("Waited for %s seconds until postgres container was up", waited) container_port = container.get_exposed_port(5432) @@ -86,7 +89,10 @@ def mysql_registry(): log_string_to_wait_for = "/usr/sbin/mysqld: ready for connections. Version: '8.0.29' socket: '/var/run/mysqld/mysqld.sock' port: 3306" waited = wait_for_logs( - container=container, predicate=log_string_to_wait_for, timeout=60, interval=10, + container=container, + predicate=log_string_to_wait_for, + timeout=60, + interval=10, ) logger.info("Waited for %s seconds until mysql container was up", waited) container_port = container.get_exposed_port(3306) @@ -106,11 +112,14 @@ def mysql_registry(): reason="does not run on mac github actions", ) @pytest.mark.parametrize( - "sql_registry", [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], + "sql_registry", + [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], ) def test_apply_entity_success(sql_registry): entity = Entity( - name="driver_car_id", description="Car driver id", tags={"team": "matchmaking"}, + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, ) project = "project" @@ -163,7 +172,8 @@ def assert_project_uuid(project, project_uuid, sql_registry): reason="does not run on mac github actions", ) @pytest.mark.parametrize( - "sql_registry", [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], + "sql_registry", + [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], ) def test_apply_feature_view_success(sql_registry): # Create Feature Views @@ -238,7 +248,8 @@ def test_apply_feature_view_success(sql_registry): reason="does not run on mac github actions", ) @pytest.mark.parametrize( - "sql_registry", [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], + "sql_registry", + [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], ) def test_apply_on_demand_feature_view_success(sql_registry): # Create Feature Views @@ -321,7 +332,8 @@ def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: reason="does not run on mac github actions", ) @pytest.mark.parametrize( - "sql_registry", [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], + "sql_registry", + [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], ) @pytest.mark.parametrize( "request_source_schema", @@ -336,7 +348,10 @@ def test_modify_feature_views_success(sql_registry, request_source_schema): created_timestamp_column="timestamp", ) - request_source = RequestSource(name="request_source", schema=request_source_schema,) + request_source = RequestSource( + name="request_source", + schema=request_source_schema, + ) entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) @@ -445,7 +460,8 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: ) @pytest.mark.integration @pytest.mark.parametrize( - "sql_registry", [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], + "sql_registry", + [lazy_fixture("mysql_registry"), lazy_fixture("pg_registry")], ) def test_apply_data_source(sql_registry): # Create Feature Views diff --git a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py index 8e2af031c5f..22a8d8a6991 100644 --- a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py +++ b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py @@ -45,7 +45,9 @@ def test_apply_stream_feature_view(simple_dataset_1) -> None: description="desc", aggregations=[ Aggregation( - column="dummy_field", function="max", time_window=timedelta(days=1), + column="dummy_field", + function="max", + time_window=timedelta(days=1), ), Aggregation( column="dummy_field2", @@ -68,7 +70,8 @@ def simple_sfv(df): assert stream_feature_views[0] == simple_sfv features = fs.get_online_features( - features=["simple_sfv:dummy_field"], entity_rows=[{"test_key": 1001}], + features=["simple_sfv:dummy_field"], + entity_rows=[{"test_key": 1001}], ).to_dict(include_event_timestamps=True) assert "test_key" in features @@ -109,7 +112,9 @@ def test_stream_feature_view_udf(simple_dataset_1) -> None: description="desc", aggregations=[ Aggregation( - column="dummy_field", function="max", time_window=timedelta(days=1), + column="dummy_field", + function="max", + time_window=timedelta(days=1), ), Aggregation( column="dummy_field2", diff --git a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py index b7a9a571af6..ce960b9c358 100644 --- a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py +++ b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py @@ -31,7 +31,8 @@ def test_infer_odfv_features(environment, universal_data_sources, infer_features ) request_source = create_conv_rate_request_source() driver_odfv = conv_rate_plus_100_feature_view( - [driver_hourly_stats, request_source], infer_features=infer_features, + [driver_hourly_stats, request_source], + infer_features=infer_features, ) feast_objects = [driver_hourly_stats, driver_odfv, driver(), customer()] @@ -83,7 +84,8 @@ def test_infer_odfv_features_with_error(environment, universal_data_sources): ) request_source = create_conv_rate_request_source() driver_odfv = conv_rate_plus_100_feature_view( - [driver_hourly_stats, request_source], features=features, + [driver_hourly_stats, request_source], + features=features, ) feast_objects = [driver_hourly_stats, driver_odfv, driver(), customer()] diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index 938773b7b77..ad29531e11c 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -64,7 +64,8 @@ class TypeTestConfig: @pytest.fixture( - params=OFFLINE_TYPE_TEST_CONFIGS, ids=[str(c) for c in OFFLINE_TYPE_TEST_CONFIGS], + params=OFFLINE_TYPE_TEST_CONFIGS, + ids=[str(c) for c in OFFLINE_TYPE_TEST_CONFIGS], ) def offline_types_test_fixtures(request, environment): config: TypeTestConfig = request.param @@ -78,7 +79,8 @@ def offline_types_test_fixtures(request, environment): @pytest.fixture( - params=ONLINE_TYPE_TEST_CONFIGS, ids=[str(c) for c in ONLINE_TYPE_TEST_CONFIGS], + params=ONLINE_TYPE_TEST_CONFIGS, + ids=[str(c) for c in ONLINE_TYPE_TEST_CONFIGS], ) def online_types_test_fixtures(request, environment): return get_fixtures(request, environment) @@ -87,15 +89,22 @@ def online_types_test_fixtures(request, environment): def get_fixtures(request, environment): config: TypeTestConfig = request.param # Lower case needed because Redshift lower-cases all table names - destination_name = f"feature_type_{config.feature_dtype}{config.feature_is_list}".replace( - ".", "" - ).lower() + destination_name = ( + f"feature_type_{config.feature_dtype}{config.feature_is_list}".replace( + ".", "" + ).lower() + ) config = request.param df = create_basic_driver_dataset( - Int64, config.feature_dtype, config.feature_is_list, config.has_empty_list, + Int64, + config.feature_dtype, + config.feature_is_list, + config.has_empty_list, ) data_source = environment.data_source_creator.create_data_source( - df, destination_name=destination_name, field_mapping={"ts_1": "ts"}, + df, + destination_name=destination_name, + field_mapping={"ts_1": "ts"}, ) fv = create_feature_view( destination_name, @@ -115,7 +124,10 @@ def test_entity_inference_types_match(environment, entity_type): fs = environment.feature_store # Don't specify value type in entity to force inference - df = create_basic_driver_dataset(entity_type, feature_dtype="int32",) + df = create_basic_driver_dataset( + entity_type, + feature_dtype="int32", + ) data_source = environment.data_source_creator.create_data_source( df, destination_name=f"entity_type_{entity_type.name.lower()}", @@ -186,7 +198,8 @@ def test_feature_get_historical_features_types_match( features = [f"{fv.name}:value"] historical_features = fs.get_historical_features( - entity_df=entity_df, features=features, + entity_df=entity_df, + features=features, ) # Note: Pandas doesn't play well with nan values in ints. BQ will also coerce to floats if there are NaNs historical_features_df = historical_features.to_df() @@ -235,7 +248,8 @@ def test_feature_get_online_features_types_match( ) online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1}], + features=features, + entity_rows=[{"driver_id": 1}], ).to_dict() feature_list_dtype_to_expected_online_response_value_type = { @@ -340,7 +354,10 @@ def assert_feature_list_types( bool, np.bool_, ), # Can be `np.bool_` if from `np.array` rather that `list` - "datetime": (np.datetime64, datetime,), # datetime.datetime + "datetime": ( + np.datetime64, + datetime, + ), # datetime.datetime } expected_dtype = feature_list_dtype_to_expected_historical_feature_list_dtype[ feature_dtype diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index ae10c834c83..d12fc717f0c 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -11,10 +11,16 @@ def test_tag_objects_for_keep_delete_update_add(simple_dataset_1): with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: entity = Entity(name="id", join_keys=["id"]) to_delete = FeatureView( - name="to_delete", entities=[entity], batch_source=file_source, ttl=None, + name="to_delete", + entities=[entity], + batch_source=file_source, + ttl=None, ) unchanged_fv = FeatureView( - name="fv1", entities=[entity], batch_source=file_source, ttl=None, + name="fv1", + entities=[entity], + batch_source=file_source, + ttl=None, ) pre_changed = FeatureView( name="fv2", @@ -31,7 +37,10 @@ def test_tag_objects_for_keep_delete_update_add(simple_dataset_1): tags={"when": "after"}, ) to_add = FeatureView( - name="to_add", entities=[entity], batch_source=file_source, ttl=None, + name="to_add", + entities=[entity], + batch_source=file_source, + ttl=None, ) keep, delete, update, add = tag_objects_for_keep_delete_update_add( diff --git a/sdk/python/tests/unit/test_data_sources.py b/sdk/python/tests/unit/test_data_sources.py index 0208a715031..61891ccf1a4 100644 --- a/sdk/python/tests/unit/test_data_sources.py +++ b/sdk/python/tests/unit/test_data_sources.py @@ -20,7 +20,8 @@ def test_push_with_batch(): push_source = PushSource( - name="test", batch_source=BigQuerySource(table="test.test"), + name="test", + batch_source=BigQuerySource(table="test.test"), ) push_source_proto = push_source.to_proto() assert push_source_proto.HasField("batch_source") @@ -48,7 +49,11 @@ def test_request_source_primitive_type_to_proto(): Field(name="f2", dtype=Bool), ] request_source = RequestSource( - name="source", schema=schema, description="desc", tags={}, owner="feast", + name="source", + schema=schema, + description="desc", + tags={}, + owner="feast", ) request_proto = request_source.to_proto() deserialized_request_source = RequestSource.from_proto(request_proto) @@ -57,13 +62,16 @@ def test_request_source_primitive_type_to_proto(): def test_hash(): push_source_1 = PushSource( - name="test", batch_source=BigQuerySource(table="test.test"), + name="test", + batch_source=BigQuerySource(table="test.test"), ) push_source_2 = PushSource( - name="test", batch_source=BigQuerySource(table="test.test"), + name="test", + batch_source=BigQuerySource(table="test.test"), ) push_source_3 = PushSource( - name="test", batch_source=BigQuerySource(table="test.test2"), + name="test", + batch_source=BigQuerySource(table="test.test2"), ) push_source_4 = PushSource( name="test", diff --git a/sdk/python/tests/unit/test_entity.py b/sdk/python/tests/unit/test_entity.py index 04a857ddefe..66ed02a71c3 100644 --- a/sdk/python/tests/unit/test_entity.py +++ b/sdk/python/tests/unit/test_entity.py @@ -27,7 +27,9 @@ def test_join_key_default(): def test_entity_class_contains_tags(): with pytest.deprecated_call(): entity = Entity( - "my-entity", description="My entity", tags={"key1": "val1", "key2": "val2"}, + "my-entity", + description="My entity", + tags={"key1": "val1", "key2": "val2"}, ) assert "key1" in entity.tags.keys() and entity.tags["key1"] == "val1" assert "key2" in entity.tags.keys() and entity.tags["key2"] == "val2" diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index d6be8e03411..7b608b621d9 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -117,7 +117,9 @@ def test_stream_feature_view_serialization(): description="desc", aggregations=[ Aggregation( - column="dummy_field", function="max", time_window=timedelta(days=1), + column="dummy_field", + function="max", + time_window=timedelta(days=1), ) ], timestamp_field="event_timestamp", @@ -153,7 +155,9 @@ def test_stream_feature_view_udfs(): description="desc", aggregations=[ Aggregation( - column="dummy_field", function="max", time_window=timedelta(days=1), + column="dummy_field", + function="max", + time_window=timedelta(days=1), ) ], timestamp_field="event_timestamp", diff --git a/sdk/python/tests/unit/test_on_demand_feature_view.py b/sdk/python/tests/unit/test_on_demand_feature_view.py index 33435b8557e..5a0f5c98d8e 100644 --- a/sdk/python/tests/unit/test_on_demand_feature_view.py +++ b/sdk/python/tests/unit/test_on_demand_feature_view.py @@ -107,7 +107,8 @@ def test_hash(): def test_inputs_parameter_deprecation_in_odfv(): date_request = RequestSource( - name="date_request", schema=[Field(name="some_date", dtype=UnixTimestamp)], + name="date_request", + schema=[Field(name="some_date", dtype=UnixTimestamp)], ) with pytest.warns(DeprecationWarning): diff --git a/sdk/python/tests/unit/test_usage.py b/sdk/python/tests/unit/test_usage.py index 13988d32642..ca842474307 100644 --- a/sdk/python/tests/unit/test_usage.py +++ b/sdk/python/tests/unit/test_usage.py @@ -234,4 +234,4 @@ def call_length_ms(call): return ( datetime.datetime.fromisoformat(call["end"]) - datetime.datetime.fromisoformat(call["start"]) - ).total_seconds() * 10 ** 3 + ).total_seconds() * 10**3 diff --git a/sdk/python/tests/utils/data_source_utils.py b/sdk/python/tests/utils/data_source_utils.py index d5f45964ca7..3f10371734d 100644 --- a/sdk/python/tests/utils/data_source_utils.py +++ b/sdk/python/tests/utils/data_source_utils.py @@ -16,7 +16,9 @@ def prep_file_source(df, timestamp_field=None) -> Iterator[FileSource]: f.close() df.to_parquet(f.name) file_source = FileSource( - file_format=ParquetFormat(), path=f.name, timestamp_field=timestamp_field, + file_format=ParquetFormat(), + path=f.name, + timestamp_field=timestamp_field, ) yield file_source @@ -38,7 +40,10 @@ def simple_bq_source_using_table_arg(df, timestamp_field=None) -> BigQuerySource job = client.load_table_from_dataframe(df, table) job.result() - return BigQuerySource(table=table, timestamp_field=timestamp_field,) + return BigQuerySource( + table=table, + timestamp_field=timestamp_field, + ) def simple_bq_source_using_query_arg(df, timestamp_field=None) -> BigQuerySource: diff --git a/setup.py b/setup.py index bbcd6ad3eca..96898aa1cf1 100644 --- a/setup.py +++ b/setup.py @@ -25,18 +25,18 @@ from pathlib import Path from subprocess import CalledProcessError -from setuptools import find_packages, Extension +from setuptools import Extension, find_packages try: from setuptools import setup - from setuptools.command.build_py import build_py from setuptools.command.build_ext import build_ext as _build_ext + from setuptools.command.build_py import build_py from setuptools.command.develop import develop from setuptools.command.install import install except ImportError: - from distutils.command.build_py import build_py from distutils.command.build_ext import build_ext as _build_ext + from distutils.command.build_py import build_py from distutils.core import setup NAME = "feast" @@ -46,7 +46,7 @@ REQUIRES_PYTHON = ">=3.7.0" REQUIRED = [ - "click>=7.0.0,<8.0.2", + "click>=7.0.0,<9.0.0", "colorama>=0.3.9,<1", "dill==0.3.*", "fastavro>=1.1.0,<2", @@ -59,18 +59,18 @@ "mmh3", "numpy>=1.22,<2", "pandas>=1,<2", - "pandavro==1.5.*", + "pandavro>=1.5.0,<2", "protobuf>3.20,<4", - "proto-plus==1.20.*", - "pyarrow>=4,<7", + "proto-plus>=1.20.0,<2", + "pyarrow>=4,<9", "pydantic>=1,<2", - "pygments==2.12.0", + "pygments>=2.12.0,<3", "PyYAML>=5.4.*,<7", "SQLAlchemy[mypy]>1,<2", - "tabulate==0.8.*", + "tabulate>=0.8.0,<1", "tenacity>=7,<9", - "toml==0.10.*", - "tqdm==4.*", + "toml>=0.10.0,<1", + "tqdm>=4,<5", "typeguard", "fastapi>=0.68.0,<1", "uvicorn[standard]>=0.14.0,<1", @@ -126,9 +126,9 @@ CI_REQUIRED = ( [ "build", - "cryptography==35.0", + "cryptography>=35.0,<36", "flake8", - "black==19.10b0", + "black>=22.6.0,<23", "isort>=5,<6", "grpcio-tools==1.47.0", "grpcio-testing==1.47.0", @@ -535,4 +535,4 @@ def copy_extensions_to_source(self): ["github.com/feast-dev/feast/go/embedded"], ) ], -) \ No newline at end of file +) From 6520f813d672966489b662834eff55deab722fc5 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Sat, 9 Jul 2022 23:35:07 -0400 Subject: [PATCH 02/11] Exclude embedded_go/lib from black Signed-off-by: Abhin Chhabra Signed-off-by: Kevin Zhang --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8ba72544404..1fef4c27c8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ exclude = ''' | pb2.py | \.pyi | protos + | sdk/python/feast/embedded_go/lib )/ ) ''' From ca7028cb5ca57f48feb8d8abb1234de7dcf78d46 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Sun, 10 Jul 2022 00:08:51 -0400 Subject: [PATCH 03/11] Generate lockfiles for Python3.9 and Python 3.10 Signed-off-by: Abhin Chhabra Signed-off-by: Kevin Zhang --- .../requirements/py3.10-ci-requirements.txt | 19 +++++++++--------- .../requirements/py3.9-ci-requirements.txt | 20 +++++++++---------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 010c9c2f40f..2dac0553490 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -33,9 +33,7 @@ anyio==3.6.1 # starlette # watchfiles appdirs==1.4.4 - # via - # black - # fissix + # via fissix appnope==0.1.3 # via ipython asn1crypto==1.5.1 @@ -53,7 +51,6 @@ async-timeout==4.0.2 attrs==21.4.0 # via # aiohttp - # black # bowler # jsonschema # pytest @@ -75,7 +72,7 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython -black==19.10b0 +black==22.6.0 # via feast (setup.py) boto3==1.20.23 # via @@ -385,7 +382,9 @@ mypy==0.961 # feast (setup.py) # sqlalchemy mypy-extensions==0.4.3 - # via mypy + # via + # black + # mypy mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 @@ -447,7 +446,9 @@ pickleshare==0.7.5 pip-tools==6.8.0 # via feast (setup.py) platformdirs==2.5.2 - # via virtualenv + # via + # black + # virtualenv pluggy==1.0.0 # via pytest ply==3.11 @@ -696,11 +697,11 @@ thriftpy2==0.4.14 # via happybase toml==0.10.2 # via - # black # feast (setup.py) # pre-commit tomli==2.0.1 # via + # black # build # coverage # mypy @@ -723,8 +724,6 @@ traitlets==5.3.0 # nbformat trino==0.314.0 # via feast (setup.py) -typed-ast==1.5.4 - # via black typeguard==2.13.3 # via feast (setup.py) types-protobuf==3.19.22 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 222660bd258..6cc15267da9 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -33,9 +33,7 @@ anyio==3.6.1 # starlette # watchfiles appdirs==1.4.4 - # via - # black - # fissix + # via fissix appnope==0.1.3 # via ipython asn1crypto==1.5.1 @@ -53,7 +51,6 @@ async-timeout==4.0.2 attrs==21.4.0 # via # aiohttp - # black # bowler # jsonschema # pytest @@ -75,7 +72,7 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython -black==19.10b0 +black==22.6.0 # via feast (setup.py) boto3==1.20.23 # via @@ -385,7 +382,9 @@ mypy==0.961 # feast (setup.py) # sqlalchemy mypy-extensions==0.4.3 - # via mypy + # via + # black + # mypy mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 @@ -447,7 +446,9 @@ pickleshare==0.7.5 pip-tools==6.8.0 # via feast (setup.py) platformdirs==2.5.2 - # via virtualenv + # via + # black + # virtualenv pluggy==1.0.0 # via pytest ply==3.11 @@ -698,11 +699,11 @@ thriftpy2==0.4.14 # via happybase toml==0.10.2 # via - # black # feast (setup.py) # pre-commit tomli==2.0.1 # via + # black # build # coverage # mypy @@ -725,8 +726,6 @@ traitlets==5.3.0 # nbformat trino==0.314.0 # via feast (setup.py) -typed-ast==1.5.4 - # via black typeguard==2.13.3 # via feast (setup.py) types-protobuf==3.19.22 @@ -753,6 +752,7 @@ typing-extensions==4.3.0 # via # aioitertools # azure-core + # black # great-expectations # mypy # pydantic From 9a38e743aaefaab07fb19e6e91d44ab9373978b6 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Sun, 10 Jul 2022 16:51:20 -0400 Subject: [PATCH 04/11] Switch to `google-cloud-bigquery[pandas]`. This was covered in https://github.com/feast-dev/feast/issues/2537. I've only generated lockfiles for Python 3.8 to test this in CI. I'll do the same for the rest of the Python versions later. Signed-off-by: Abhin Chhabra Signed-off-by: Kevin Zhang --- .../requirements/py3.8-ci-requirements.txt | 2 -- setup.py | 26 +++++++++++++------ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index a943eb3c329..0b481c7d144 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -265,8 +265,6 @@ googleapis-common-protos==1.56.3 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) diff --git a/setup.py b/setup.py index 96898aa1cf1..1af2d31afd5 100644 --- a/setup.py +++ b/setup.py @@ -427,12 +427,18 @@ def build_extension(self, ext: Extension): print(f"CWD: {os.getcwd()}") destination = os.path.dirname(os.path.abspath(self.get_ext_fullpath(ext.name))) - subprocess.check_call(["go", "install", "golang.org/x/tools/cmd/goimports"], - env={"PATH": bin_path, **go_env}) - subprocess.check_call(["go", "get", "github.com/go-python/gopy@v0.4.4"], - env={"PATH": bin_path, **go_env}) - subprocess.check_call(["go", "install", "github.com/go-python/gopy"], - env={"PATH": bin_path, **go_env}) + subprocess.check_call( + ["go", "install", "golang.org/x/tools/cmd/goimports"], + env={"PATH": bin_path, **go_env}, + ) + subprocess.check_call( + ["go", "get", "github.com/go-python/gopy@v0.4.4"], + env={"PATH": bin_path, **go_env}, + ) + subprocess.check_call( + ["go", "install", "github.com/go-python/gopy"], + env={"PATH": bin_path, **go_env}, + ) subprocess.check_call( [ "gopy", @@ -442,12 +448,16 @@ def build_extension(self, ext: Extension): "-vm", sys.executable, "--build-tags", - 'cgo,ccalloc', + "cgo,ccalloc", "--dynamic-link=True", "-no-make", *ext.sources, ], - env={"PATH": bin_path, "CGO_LDFLAGS_ALLOW": ".*", **go_env,}, + env={ + "PATH": bin_path, + "CGO_LDFLAGS_ALLOW": ".*", + **go_env, + }, ) def copy_extensions_to_source(self): From b94bf07a1ecd6830112eb739fb8fcc649938750d Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 19:42:28 -0700 Subject: [PATCH 05/11] Fix Signed-off-by: Kevin Zhang --- .../requirements/py3.8-ci-requirements.txt | 40 +++++++++---------- .../requirements/py3.8-requirements.txt | 14 +++---- setup.py | 2 +- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 0b481c7d144..44a5b8a489a 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt # -absl-py==1.1.0 +absl-py==1.2.0 # via tensorflow-metadata adal==1.2.7 # via @@ -131,7 +131,7 @@ colorama==0.4.5 # via # feast (setup.py) # great-expectations -coverage[toml]==6.4.1 +coverage[toml]==6.4.2 # via pytest-cov cryptography==35.0.0 # via @@ -160,7 +160,7 @@ deprecation==2.1.0 # via testcontainers dill==0.3.5.1 # via feast (setup.py) -distlib==0.3.4 +distlib==0.3.5 # via virtualenv docker==5.0.3 # via @@ -176,13 +176,13 @@ execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.78.0 +fastapi==0.79.0 # via feast (setup.py) fastavro==1.5.1 # via # feast (setup.py) # pandavro -fastjsonschema==2.15.3 +fastjsonschema==2.16.1 # via nbformat filelock==3.7.1 # via virtualenv @@ -215,9 +215,9 @@ google-api-core[grpc]==2.8.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.52.0 +google-api-python-client==2.53.0 # via firebase-admin -google-auth==2.9.0 +google-auth==2.9.1 # via # gcsfs # google-api-core @@ -232,19 +232,19 @@ google-auth-oauthlib==0.5.2 # via gcsfs google-cloud-bigquery[pandas]==3.2.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.14.0 +google-cloud-bigquery-storage==2.14.1 # via # feast (setup.py) # google-cloud-bigquery -google-cloud-core==2.3.1 +google-cloud-core==2.3.2 # via # google-cloud-bigquery # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.7.2 +google-cloud-datastore==2.8.0 # via feast (setup.py) -google-cloud-firestore==2.5.3 +google-cloud-firestore==2.6.0 # via firebase-admin google-cloud-storage==2.4.0 # via @@ -257,7 +257,7 @@ google-resumable-media==2.3.3 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core @@ -333,7 +333,7 @@ jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.7.1 +jsonschema==4.7.2 # via # altair # feast (setup.py) @@ -353,7 +353,7 @@ mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==2.0.3 +mistune==2.0.4 # via great-expectations mmh3==3.0.0 # via feast (setup.py) @@ -604,8 +604,6 @@ pyyaml==6.0 # uvicorn redis==4.2.2 # via feast (setup.py) -regex==2022.7.9 - # via black requests==2.28.1 # via # adal @@ -740,13 +738,13 @@ types-python-dateutil==2.8.18 # via feast (setup.py) types-pytz==2022.1.1 # via feast (setup.py) -types-pyyaml==6.0.9 +types-pyyaml==6.0.10 # via feast (setup.py) -types-redis==4.3.3 +types-redis==4.3.4 # via feast (setup.py) -types-requests==2.28.0 +types-requests==2.28.2 # via feast (setup.py) -types-setuptools==62.6.1 +types-setuptools==63.2.0 # via feast (setup.py) types-tabulate==0.8.11 # via feast (setup.py) @@ -805,7 +803,7 @@ xmltodict==0.13.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.8.0 +zipp==3.8.1 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 9d192d08191..9e741fa39ac 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt # -absl-py==1.1.0 +absl-py==1.2.0 # via tensorflow-metadata anyio==3.6.1 # via @@ -38,7 +38,7 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.5.1 # via feast (setup.py) -fastapi==0.78.0 +fastapi==0.79.0 # via feast (setup.py) fastavro==1.5.1 # via @@ -50,15 +50,13 @@ fsspec==2022.5.0 # via dask google-api-core==2.8.2 # via feast (setup.py) -google-auth==2.9.0 +google-auth==2.9.1 # via google-api-core -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -77,7 +75,7 @@ importlib-resources==5.8.0 # via jsonschema jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.7.1 +jsonschema==4.7.2 # via feast (setup.py) locket==1.0.0 # via partd @@ -199,5 +197,5 @@ watchfiles==0.15.0 # via uvicorn websockets==10.3 # via uvicorn -zipp==3.8.0 +zipp==3.8.1 # via importlib-resources diff --git a/setup.py b/setup.py index 1af2d31afd5..dceb2354bd5 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "Jinja2>=2,<4", "jsonschema", "mmh3", - "numpy>=1.22,<2", + "numpy>=1.22,<3", "pandas>=1,<2", "pandavro>=1.5.0,<2", "protobuf>3.20,<4", From d32d4a3777c1b864e51e21a93262a8970207f51b Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 19:45:42 -0700 Subject: [PATCH 06/11] Update 3.9 reqs Signed-off-by: Kevin Zhang --- sdk/python/requirements/py3.9-ci-requirements.txt | 4 ---- sdk/python/requirements/py3.9-requirements.txt | 2 -- 2 files changed, 6 deletions(-) diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6cc15267da9..68b0f552b71 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -261,8 +261,6 @@ googleapis-common-protos==1.56.4 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -600,8 +598,6 @@ pyyaml==6.0 # uvicorn redis==4.2.2 # via feast (setup.py) -regex==2022.7.9 - # via black requests==2.28.1 # via # adal diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index fe11b7348b4..5a8ff0bc04c 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -57,8 +57,6 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) From 3ac271e6b3025e0f6e0c406085fc149ca13c5ae3 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 19:51:11 -0700 Subject: [PATCH 07/11] UPdate 3.10 Signed-off-by: Kevin Zhang --- sdk/python/requirements/py3.10-ci-requirements.txt | 4 ---- sdk/python/requirements/py3.10-requirements.txt | 2 -- 2 files changed, 6 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 2dac0553490..bab42c70b3f 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -261,8 +261,6 @@ googleapis-common-protos==1.56.4 # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) @@ -600,8 +598,6 @@ pyyaml==6.0 # uvicorn redis==4.2.2 # via feast (setup.py) -regex==2022.7.9 - # via black requests==2.28.1 # via # adal diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 22e4d44e1a9..9356b351dfd 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -57,8 +57,6 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy grpcio==1.47.0 # via # feast (setup.py) From 08176cbe8a8d51ccc1c29c7f245973a655c622fd Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 22:25:04 -0700 Subject: [PATCH 08/11] Fix Signed-off-by: Kevin Zhang --- .../requirements/py3.10-ci-requirements.txt | 7 ++++--- .../requirements/py3.10-requirements.txt | 7 ++++--- .../requirements/py3.8-ci-requirements.txt | 19 ++++++------------- .../requirements/py3.8-requirements.txt | 13 +++++-------- .../requirements/py3.9-ci-requirements.txt | 7 ++++--- .../requirements/py3.9-requirements.txt | 7 ++++--- setup.py | 4 ++-- 7 files changed, 29 insertions(+), 35 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index bab42c70b3f..03bc8b9a28a 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -174,7 +174,7 @@ executing==0.8.3 # via stack-data fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -416,7 +416,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.2.5 +pandas==1.4.3 # via # altair # db-dtypes @@ -425,7 +425,7 @@ pandas==1.2.5 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) parso==0.8.3 # via jedi @@ -645,6 +645,7 @@ six==1.16.0 # happybase # mock # msrestazure + # pandavro # python-dateutil # virtualenv sniffio==1.2.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 9356b351dfd..115a6273411 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -95,11 +95,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.2.5 +pandas==1.4.3 # via # feast (setup.py) # pandavro -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) partd==1.2.0 # via dask @@ -150,6 +150,7 @@ six==1.16.0 # via # google-auth # grpcio + # pandavro # python-dateutil sniffio==1.2.0 # via anyio diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 44a5b8a489a..4261a3862c9 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt @@ -72,10 +72,6 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython -backports-zoneinfo==0.2.1 - # via - # pytz-deprecation-shim - # tzlocal black==22.6.0 # via feast (setup.py) boto3==1.20.23 @@ -178,7 +174,7 @@ executing==0.8.3 # via stack-data fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -306,8 +302,6 @@ imagesize==1.4.1 # via sphinx importlib-metadata==4.12.0 # via great-expectations -importlib-resources==5.8.0 - # via jsonschema iniconfig==1.1.1 # via pytest ipython==8.4.0 @@ -422,7 +416,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.2.5 +pandas==1.4.3 # via # altair # db-dtypes @@ -431,7 +425,7 @@ pandas==1.2.5 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) parso==0.8.3 # via jedi @@ -653,6 +647,7 @@ six==1.16.0 # happybase # mock # msrestazure + # pandavro # python-dateutil # virtualenv sniffio==1.2.0 @@ -804,9 +799,7 @@ xmltodict==0.13.0 yarl==1.7.2 # via aiohttp zipp==3.8.1 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 9e741fa39ac..91cccd6edb4 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -71,8 +71,6 @@ idna==3.3 # via # anyio # requests -importlib-resources==5.8.0 - # via jsonschema jinja2==3.1.2 # via feast (setup.py) jsonschema==4.7.2 @@ -97,11 +95,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.2.5 +pandas==1.4.3 # via # feast (setup.py) # pandavro -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) partd==1.2.0 # via dask @@ -152,6 +150,7 @@ six==1.16.0 # via # google-auth # grpcio + # pandavro # python-dateutil sniffio==1.2.0 # via anyio @@ -197,5 +196,3 @@ watchfiles==0.15.0 # via uvicorn websockets==10.3 # via uvicorn -zipp==3.8.1 - # via importlib-resources diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 68b0f552b71..2706348d41f 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -174,7 +174,7 @@ executing==0.8.3 # via stack-data fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -416,7 +416,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.2.5 +pandas==1.4.3 # via # altair # db-dtypes @@ -425,7 +425,7 @@ pandas==1.2.5 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) parso==0.8.3 # via jedi @@ -647,6 +647,7 @@ six==1.16.0 # happybase # mock # msrestazure + # pandavro # python-dateutil # virtualenv sniffio==1.2.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 5a8ff0bc04c..d015b3aa1c2 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.1 +fastavro==1.5.2 # via # feast (setup.py) # pandavro @@ -95,11 +95,11 @@ numpy==1.23.1 # pyarrow packaging==21.3 # via dask -pandas==1.2.5 +pandas==1.4.3 # via # feast (setup.py) # pandavro -pandavro==1.7.0 +pandavro==1.5.2 # via feast (setup.py) partd==1.2.0 # via dask @@ -150,6 +150,7 @@ six==1.16.0 # via # google-auth # grpcio + # pandavro # python-dateutil sniffio==1.2.0 # via anyio diff --git a/setup.py b/setup.py index dceb2354bd5..c6d4aa30b74 100644 --- a/setup.py +++ b/setup.py @@ -58,8 +58,8 @@ "jsonschema", "mmh3", "numpy>=1.22,<3", - "pandas>=1,<2", - "pandavro>=1.5.0,<2", + "pandas>=1.4.3,<2", + "pandavro==1.5.*", "protobuf>3.20,<4", "proto-plus>=1.20.0,<2", "pyarrow>=4,<9", From f8759c163007ec0a24aba10a0a0a81f05a79637d Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 22:37:12 -0700 Subject: [PATCH 09/11] Fix Signed-off-by: Kevin Zhang --- sdk/python/feast/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index ce8046a42ee..9413d0e1d21 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -1709,7 +1709,7 @@ def get_validation_reference( Returns either the specified ValidationReference, or raises an exception if none is found """ - registry_proto = self._get_registry_proto(allow_cache=allow_cache) + registry_proto = self._get_registry_proto(project=project, allow_cache=allow_cache) for validation_reference in registry_proto.validation_references: if ( validation_reference.name == name From 407915fa9b8c2e1023d75cfc7af5cbab0a9e584e Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 22:38:59 -0700 Subject: [PATCH 10/11] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/feast/infra/materialization/lambda/app.py | 11 +++++++---- sdk/python/feast/infra/registry_stores/sql.py | 4 +++- sdk/python/feast/registry.py | 4 +++- .../integration/feature_repos/repo_configuration.py | 6 +++--- .../tests/integration/materialization/test_lambda.py | 10 ++++++++-- .../tests/integration/registration/test_registry.py | 3 ++- 6 files changed, 26 insertions(+), 12 deletions(-) diff --git a/sdk/python/feast/infra/materialization/lambda/app.py b/sdk/python/feast/infra/materialization/lambda/app.py index ebed4c96e06..375674adaa7 100644 --- a/sdk/python/feast/infra/materialization/lambda/app.py +++ b/sdk/python/feast/infra/materialization/lambda/app.py @@ -16,9 +16,9 @@ def handler(event, context): """Provide an event that contains the following keys: - - operation: one of the operations in the operations dict below - - tableName: required for operations that interact with DynamoDB - - payload: a parameter to pass to the operation being performed + - operation: one of the operations in the operations dict below + - tableName: required for operations that interact with DynamoDB + - payload: a parameter to pass to the operation being performed """ print("Received event: " + json.dumps(event, indent=2), flush=True) @@ -71,7 +71,10 @@ def handler(event, context): batch, feature_view, join_key_to_value_type ) store._provider.online_write_batch( - store.config, feature_view, rows_to_write, lambda x: None, + store.config, + feature_view, + rows_to_write, + lambda x: None, ) written_rows += len(rows_to_write) return {"written_rows": written_rows} diff --git a/sdk/python/feast/infra/registry_stores/sql.py b/sdk/python/feast/infra/registry_stores/sql.py index 3daf0489811..9c6b47a714c 100644 --- a/sdk/python/feast/infra/registry_stores/sql.py +++ b/sdk/python/feast/infra/registry_stores/sql.py @@ -473,7 +473,9 @@ def list_project_metadata( self, project: str, allow_cache: bool = False ) -> List[ProjectMetadata]: with self.engine.connect() as conn: - stmt = select(feast_metadata).where(feast_metadata.c.project_id == project,) + stmt = select(feast_metadata).where( + feast_metadata.c.project_id == project, + ) rows = conn.execute(stmt).all() if rows: project_metadata = ProjectMetadata(project_name=project) diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 9413d0e1d21..c501a42c186 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -1709,7 +1709,9 @@ def get_validation_reference( Returns either the specified ValidationReference, or raises an exception if none is found """ - registry_proto = self._get_registry_proto(project=project, allow_cache=allow_cache) + registry_proto = self._get_registry_proto( + project=project, allow_cache=allow_cache + ) for validation_reference in registry_proto.validation_references: if ( validation_reference.name == name diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 022d3ec3f12..672a5bdcfa0 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -402,9 +402,9 @@ def construct_test_environment( if ( test_repo_config.python_feature_server and test_repo_config.provider == "aws" ) or test_repo_config.registry_location == RegistryLocation.S3: - registry: Union[str, RegistryConfig] = ( - f"s3://feast-integration-tests/registries/{project}/registry.db" - ) + registry: Union[ + str, RegistryConfig + ] = f"s3://feast-integration-tests/registries/{project}/registry.db" else: registry = RegistryConfig( path=str(Path(repo_dir_name) / "registry.db"), diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 66cd2c5eb97..4a259fd3654 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -38,11 +38,17 @@ def test_lambda_materialization(): df = create_basic_driver_dataset() ds = lambda_environment.data_source_creator.create_data_source( - df, lambda_environment.feature_store.project, field_mapping={"ts_1": "ts"}, + df, + lambda_environment.feature_store.project, + field_mapping={"ts_1": "ts"}, ) fs = lambda_environment.feature_store - driver = Entity(name="driver_id", join_key="driver_id", value_type=ValueType.INT64,) + driver = Entity( + name="driver_id", + join_key="driver_id", + value_type=ValueType.INT64, + ) driver_stats_fv = FeatureView( name="driver_hourly_stats", diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index a9fecc2f04e..e6309779f91 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -595,7 +595,8 @@ def test_apply_data_source_integration(test_registry: Registry): @pytest.mark.parametrize( - "test_registry", [lazy_fixture("local_registry")], + "test_registry", + [lazy_fixture("local_registry")], ) def test_apply_data_source(test_registry: Registry): run_test_data_source_apply(test_registry) From f23a1fe4b6527152118df092aa64489fc502d897 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Mon, 18 Jul 2022 23:14:08 -0700 Subject: [PATCH 11/11] Fix dependencies Signed-off-by: Kevin Zhang --- sdk/python/requirements/py3.8-ci-requirements.txt | 12 ++++++++++-- sdk/python/requirements/py3.8-requirements.txt | 6 +++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 4261a3862c9..d25c433bac4 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.9 +# This file is autogenerated by pip-compile with python 3.8 # To update, run: # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt @@ -72,6 +72,10 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython +backports-zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==22.6.0 # via feast (setup.py) boto3==1.20.23 @@ -302,6 +306,8 @@ imagesize==1.4.1 # via sphinx importlib-metadata==4.12.0 # via great-expectations +importlib-resources==5.8.0 + # via jsonschema iniconfig==1.1.1 # via pytest ipython==8.4.0 @@ -799,7 +805,9 @@ xmltodict==0.13.0 yarl==1.7.2 # via aiohttp zipp==3.8.1 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 91cccd6edb4..3de6ae7e9eb 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.9 +# This file is autogenerated by pip-compile with python 3.8 # To update, run: # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt @@ -71,6 +71,8 @@ idna==3.3 # via # anyio # requests +importlib-resources==5.8.0 + # via jsonschema jinja2==3.1.2 # via feast (setup.py) jsonschema==4.7.2 @@ -196,3 +198,5 @@ watchfiles==0.15.0 # via uvicorn websockets==10.3 # via uvicorn +zipp==3.8.1 + # via importlib-resources