diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 4a5165ca8e7e99..7e85d080c601c2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1898,6 +1898,9 @@ definitions: type: string examples: - segment_id + interpolation_context: + - config + - parameters inject_into: title: Inject Into description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated. @@ -2154,6 +2157,13 @@ interpolation: examples: - start_date: 2010-01-01 api_key: "*****" + - title: parameters + description: Additional runtime parameters, to be used for string interpolation. Parameters can be passed down from a parent component to its subcomponents using the $parameters key. This can be used to avoid repetitions. + type: object + examples: + - path: "automations" + data_export_path: "automations" + cursor_field: "updated_at" - title: headers description: The HTTP headers from the last response received from the API. The object's keys are the header names from the response. type: object diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 685f0b7e6876d8..e2a5f27d1ef360 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -247,9 +247,11 @@ def request_kwargs(self) -> Mapping[str, Any]: def _get_request_options(self, option_type: RequestOptionType, stream_slice: StreamSlice): options = {} if self.start_time_option and self.start_time_option.inject_into == option_type: - options[self.start_time_option.field_name] = stream_slice.get(self.partition_field_start.eval(self.config)) + options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( + self.partition_field_start.eval(self.config) + ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name] = stream_slice.get(self.partition_field_end.eval(self.config)) + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(self.partition_field_end.eval(self.config)) return options def should_be_synced(self, record: Record) -> bool: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 1ddc137356aaf4..182c0d5e369016 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -11,616 +11,610 @@ class AuthFlowType(Enum): - oauth2_0 = 'oauth2.0' - oauth1_0 = 'oauth1.0' + oauth2_0 = "oauth2.0" + oauth1_0 = "oauth1.0" class BasicHttpAuthenticator(BaseModel): - type: Literal['BasicHttpAuthenticator'] + type: Literal["BasicHttpAuthenticator"] username: str = Field( ..., - description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', + description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", + examples=["{{ config['password'] }}", ""], + title="Password", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class BearerAuthenticator(BaseModel): - type: Literal['BearerAuthenticator'] + type: Literal["BearerAuthenticator"] api_token: str = Field( ..., - description='Token to inject as request header for authenticating with the API.', + description="Token to inject as request header for authenticating with the API.", examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title='Bearer Token', + title="Bearer Token", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CheckStream(BaseModel): - type: Literal['CheckStream'] + type: Literal["CheckStream"] stream_names: List[str] = Field( ..., - description='Names of the streams to try reading from when running a check operation.', - examples=[['users'], ['users', 'contacts']], - title='Stream Names', + description="Names of the streams to try reading from when running a check operation.", + examples=[["users"], ["users", "contacts"]], + title="Stream Names", ) class ConstantBackoffStrategy(BaseModel): - type: Literal['ConstantBackoffStrategy'] + type: Literal["ConstantBackoffStrategy"] backoff_time_in_seconds: Union[float, str] = Field( ..., - description='Backoff time in seconds.', + description="Backoff time in seconds.", examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title='Backoff Time', + title="Backoff Time", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomAuthenticator'] + type: Literal["CustomAuthenticator"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', - examples=['source_railz.components.ShortLivedTokenAuthenticator'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", + examples=["source_railz.components.ShortLivedTokenAuthenticator"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomBackoffStrategy'] + type: Literal["CustomBackoffStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomBackoffStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomBackoffStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomErrorHandler'] + type: Literal["CustomErrorHandler"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', - examples=['source_railz.components.MyCustomErrorHandler'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", + examples=["source_railz.components.MyCustomErrorHandler"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomIncrementalSync'] + type: Literal["CustomIncrementalSync"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', - examples=['source_railz.components.MyCustomIncrementalSync'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", + examples=["source_railz.components.MyCustomIncrementalSync"], + title="Class Name", ) cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync.', + description="The location of the value on a record that will be used as a bookmark during sync.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPaginationStrategy'] + type: Literal["CustomPaginationStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomPaginationStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomPaginationStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordExtractor'] + type: Literal["CustomRecordExtractor"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRequester'] + type: Literal["CustomRequester"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRetriever'] + type: Literal["CustomRetriever"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRetriever'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRetriever"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPartitionRouter'] + type: Literal["CustomPartitionRouter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', - examples=['source_railz.components.MyCustomPartitionRouter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", + examples=["source_railz.components.MyCustomPartitionRouter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomTransformation'] + type: Literal["CustomTransformation"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', - examples=['source_railz.components.MyCustomTransformation'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", + examples=["source_railz.components.MyCustomTransformation"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - 'refresh_token', - description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', - examples=['refresh_token'], - title='Refresh Token Property Name', + "refresh_token", + description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", + examples=["refresh_token"], + title="Refresh Token Property Name", ) access_token_config_path: Optional[List[str]] = Field( - ['credentials', 'access_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'access_token'], ['access_token']], - title='Config Path To Access Token', + ["credentials", "access_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "access_token"], ["access_token"]], + title="Config Path To Access Token", ) refresh_token_config_path: Optional[List[str]] = Field( - ['credentials', 'refresh_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'refresh_token'], ['refresh_token']], - title='Config Path To Refresh Token', + ["credentials", "refresh_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "refresh_token"], ["refresh_token"]], + title="Config Path To Refresh Token", ) token_expiry_date_config_path: Optional[List[str]] = Field( - ['credentials', 'token_expiry_date'], - description='Config path to the expiry date. Make sure actually exists in the config.', - examples=[['credentials', 'token_expiry_date']], - title='Config Path To Expiry Date', + ["credentials", "token_expiry_date"], + description="Config path to the expiry date. Make sure actually exists in the config.", + examples=[["credentials", "token_expiry_date"]], + title="Config Path To Expiry Date", ) class OAuthAuthenticator(BaseModel): - type: Literal['OAuthAuthenticator'] + type: Literal["OAuthAuthenticator"] client_id: str = Field( ..., - description='The OAuth client ID. Fill it in the user inputs.', + description="The OAuth client ID. Fill it in the user inputs.", examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title='Client ID', + title="Client ID", ) client_secret: str = Field( ..., - description='The OAuth client secret. Fill it in the user inputs.', + description="The OAuth client secret. Fill it in the user inputs.", examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title='Client Secret', + title="Client Secret", ) refresh_token: Optional[str] = Field( None, - description='Credential artifact used to get a new access token.', + description="Credential artifact used to get a new access token.", examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title='Refresh Token', + title="Refresh Token", ) token_refresh_endpoint: str = Field( ..., - description='The full URL to call to obtain a new access token.', - examples=['https://connect.squareup.com/oauth2/token'], - title='Token Refresh Endpoint', + description="The full URL to call to obtain a new access token.", + examples=["https://connect.squareup.com/oauth2/token"], + title="Token Refresh Endpoint", ) access_token_name: Optional[str] = Field( - 'access_token', - description='The name of the property which contains the access token in the response from the token refresh endpoint.', - examples=['access_token'], - title='Access Token Property Name', + "access_token", + description="The name of the property which contains the access token in the response from the token refresh endpoint.", + examples=["access_token"], + title="Access Token Property Name", ) expires_in_name: Optional[str] = Field( - 'expires_in', - description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', - examples=['expires_in'], - title='Token Expiry Property Name', + "expires_in", + description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", + examples=["expires_in"], + title="Token Expiry Property Name", ) grant_type: Optional[str] = Field( - 'refresh_token', - description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', - examples=['refresh_token', 'client_credentials'], - title='Grant Type', + "refresh_token", + description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", + examples=["refresh_token", "client_credentials"], + title="Grant Type", ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description='Body of the request sent to get a new access token.', + description="Body of the request sent to get a new access token.", examples=[ { - 'applicationId': "{{ config['application_id'] }}", - 'applicationSecret': "{{ config['application_secret'] }}", - 'token': "{{ config['token'] }}", + "applicationId": "{{ config['application_id'] }}", + "applicationSecret": "{{ config['application_secret'] }}", + "token": "{{ config['token'] }}", } ], - title='Refresh Request Body', + title="Refresh Request Body", ) scopes: Optional[List[str]] = Field( None, - description='List of scopes that should be granted to the access token.', - examples=[ - ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] - ], - title='Scopes', + description="List of scopes that should be granted to the access token.", + examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], + title="Scopes", ) token_expiry_date: Optional[str] = Field( None, - description='The access token expiry date.', - examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], - title='Token Expiry Date', + description="The access token expiry date.", + examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], + title="Token Expiry Date", ) token_expiry_date_format: Optional[str] = Field( None, - description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', - examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], - title='Token Expiry Date Format', + description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", + examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], + title="Token Expiry Date Format", ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', - title='Token Updater', + description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", + title="Token Updater", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ExponentialBackoffStrategy(BaseModel): - type: Literal['ExponentialBackoffStrategy'] + type: Literal["ExponentialBackoffStrategy"] factor: Optional[Union[float, str]] = Field( 5, - description='Multiplicative constant applied on each retry.', - examples=[5, 5.5, '10'], - title='Factor', + description="Multiplicative constant applied on each retry.", + examples=[5, 5.5, "10"], + title="Factor", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal['Bearer'] + type: Literal["Bearer"] class HttpMethod(Enum): - GET = 'GET' - POST = 'POST' + GET = "GET" + POST = "POST" class Action(Enum): - SUCCESS = 'SUCCESS' - FAIL = 'FAIL' - RETRY = 'RETRY' - IGNORE = 'IGNORE' + SUCCESS = "SUCCESS" + FAIL = "FAIL" + RETRY = "RETRY" + IGNORE = "IGNORE" class HttpResponseFilter(BaseModel): - type: Literal['HttpResponseFilter'] + type: Literal["HttpResponseFilter"] action: Action = Field( ..., - description='Action to execute if a response matches the filter.', - examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE'], - title='Action', + description="Action to execute if a response matches the filter.", + examples=["SUCCESS", "FAIL", "RETRY", "IGNORE"], + title="Action", ) error_message: Optional[str] = Field( None, - description='Error Message to display if the response matches the filter.', - title='Error Message', + description="Error Message to display if the response matches the filter.", + title="Error Message", ) error_message_contains: Optional[str] = Field( None, - description='Match the response if its error message contains the substring.', - example=['This API operation is not enabled for this site'], - title='Error Message Substring', + description="Match the response if its error message contains the substring.", + example=["This API operation is not enabled for this site"], + title="Error Message Substring", ) http_codes: Optional[List[int]] = Field( None, - description='Match the response if its HTTP code is included in this list.', + description="Match the response if its HTTP code is included in this list.", examples=[[420, 429], [500]], - title='HTTP Codes', + title="HTTP Codes", ) predicate: Optional[str] = Field( None, - description='Match the response if the predicate evaluates to true.', + description="Match the response if the predicate evaluates to true.", examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title='Predicate', + title="Predicate", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class InlineSchemaLoader(BaseModel): - type: Literal['InlineSchemaLoader'] + type: Literal["InlineSchemaLoader"] schema_: Optional[Dict[str, Any]] = Field( None, - alias='schema', + alias="schema", description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title='Schema', + title="Schema", ) class JsonFileSchemaLoader(BaseModel): - type: Literal['JsonFileSchemaLoader'] + type: Literal["JsonFileSchemaLoader"] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=['./schemas/users.json'], - title='File Path', + example=["./schemas/users.json"], + title="File Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class JsonDecoder(BaseModel): - type: Literal['JsonDecoder'] + type: Literal["JsonDecoder"] class MinMaxDatetime(BaseModel): - type: Literal['MinMaxDatetime'] + type: Literal["MinMaxDatetime"] datetime: str = Field( ..., - description='Datetime value.', - examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], - title='Datetime', + description="Datetime value.", + examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], + title="Datetime", ) datetime_format: Optional[str] = Field( - '', + "", description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], - title='Datetime Format', + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], + title="Datetime Format", ) max_datetime: Optional[str] = Field( None, - description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2021-01-01T00:00:00Z', '2021-01-01'], - title='Max Datetime', + description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2021-01-01T00:00:00Z", "2021-01-01"], + title="Max Datetime", ) min_datetime: Optional[str] = Field( None, - description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2010-01-01T00:00:00Z', '2010-01-01'], - title='Min Datetime', + description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2010-01-01T00:00:00Z", "2010-01-01"], + title="Min Datetime", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoAuth(BaseModel): - type: Literal['NoAuth'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["NoAuth"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoPagination(BaseModel): - type: Literal['NoPagination'] + type: Literal["NoPagination"] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[ - Dict[str, Any] - ] = Field( + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", examples=[ - {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, { - 'app_id': { - 'type': 'string', - 'path_in_connector_config': ['info', 'app_id'], + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], } }, ], - title='OAuth user input', + title="OAuth user input", ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - 'refresh_token': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'refresh_token'], + "refresh_token": { + "type": "string,", + "path_in_connector_config": ["credentials", "refresh_token"], } } ], - title='OAuth output specification', + title="OAuth output specification", ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', - examples=[ - {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} - ], - title='OAuth input specification', + description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", + examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], + title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - 'client_id': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_id'], + "client_id": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_id"], }, - 'client_secret': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_secret'], + "client_secret": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_secret"], }, } ], - title='OAuth server output specification', + title="OAuth server output specification", ) class OffsetIncrement(BaseModel): - type: Literal['OffsetIncrement'] + type: Literal["OffsetIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100, "{{ config['page_size'] }}"], - title='Limit', + title="Limit", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `offset` with value `0` during the first request', - title='Inject Offset', + description="Using the `offset` with value `0` during the first request", + title="Inject Offset", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PageIncrement(BaseModel): - type: Literal['PageIncrement'] + type: Literal["PageIncrement"] page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', - examples=[100, '100'], - title='Page Size', + description="The number of records to include in each pages.", + examples=[100, "100"], + title="Page Size", ) start_from_page: Optional[int] = Field( 0, - description='Index of the first page to request.', + description="Index of the first page to request.", examples=[0, 1], - title='Start From Page', + title="Start From Page", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `page number` with value defined by `start_from_page` during the first request', - title='Inject Page Number', + description="Using the `page number` with value defined by `start_from_page` during the first request", + title="Inject Page Number", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', - examples=['id', ['code', 'type']], - title='Primary Key', + description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", + examples=["id", ["code", "type"]], + title="Primary Key", ) class RecordFilter(BaseModel): - type: Literal['RecordFilter'] + type: Literal["RecordFilter"] condition: Optional[str] = Field( - '', - description='The predicate to filter a record. Records will be removed if evaluated to False.', + "", + description="The predicate to filter a record. Records will be removed if evaluated to False.", examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SchemaNormalization(Enum): - None_ = 'None' - Default = 'Default' + None_ = "None" + Default = "Default" class RemoveFields(BaseModel): - type: Literal['RemoveFields'] + type: Literal["RemoveFields"] condition: Optional[str] = Field( - '', + "", description="The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.", examples=[ "{{ property|string == '' }}", - '{{ property is integer }}', - '{{ property|length > 5 }}', + "{{ property is integer }}", + "{{ property|length > 5 }}", "{{ property == 'some_string_to_match' }}", ], ) field_pointers: List[List[str]] = Field( ..., - description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', - examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], - title='Field Paths', + description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", + examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], + title="Field Paths", ) class RequestPath(BaseModel): - type: Literal['RequestPath'] + type: Literal["RequestPath"] class InjectInto(Enum): - request_parameter = 'request_parameter' - header = 'header' - body_data = 'body_data' - body_json = 'body_json' + request_parameter = "request_parameter" + header = "header" + body_data = "body_data" + body_json = "body_json" class RequestOption(BaseModel): - type: Literal['RequestOption'] + type: Literal["RequestOption"] field_name: str = Field( ..., - description='Configures which key should be used in the location that the descriptor is being injected into', - examples=['segment_id'], - title='Request Option', + description="Configures which key should be used in the location that the descriptor is being injected into", + examples=["segment_id"], + title="Request Option", ) inject_into: InjectInto = Field( ..., - description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', - examples=['request_parameter', 'header', 'body_data', 'body_json'], - title='Inject Into', + description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", + examples=["request_parameter", "header", "body_data", "body_json"], + title="Inject Into", ) @@ -632,106 +626,106 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal['LegacySessionTokenAuthenticator'] + type: Literal["LegacySessionTokenAuthenticator"] header: str = Field( ..., - description='The name of the session token header that will be injected in the request', - examples=['X-Session'], - title='Session Request Header', + description="The name of the session token header that will be injected in the request", + examples=["X-Session"], + title="Session Request Header", ) login_url: str = Field( ..., - description='Path of the login URL (do not include the base URL)', - examples=['session'], - title='Login Path', + description="Path of the login URL (do not include the base URL)", + examples=["session"], + title="Login Path", ) session_token: Optional[str] = Field( None, - description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', + description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", example=["{{ config['session_token'] }}"], - title='Session Token', + title="Session Token", ) session_token_response_key: str = Field( ..., - description='Name of the key of the session token to be extracted from the response', - examples=['id'], - title='Response Token Response Key', + description="Name of the key of the session token to be extracted from the response", + examples=["id"], + title="Response Token Response Key", ) username: Optional[str] = Field( None, - description='Username used to authenticate and obtain a session token', + description="Username used to authenticate and obtain a session token", examples=[" {{ config['username'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='Password used to authenticate and obtain a session token', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="Password used to authenticate and obtain a session token", + examples=["{{ config['password'] }}", ""], + title="Password", ) validate_session_url: str = Field( ..., - description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', - examples=['user/current'], - title='Validate Session Path', + description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", + examples=["user/current"], + title="Validate Session Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ValueType(Enum): - string = 'string' - number = 'number' - integer = 'integer' - boolean = 'boolean' + string = "string" + number = "number" + integer = "integer" + boolean = "boolean" class WaitTimeFromHeader(BaseModel): - type: Literal['WaitTimeFromHeader'] + type: Literal["WaitTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['Retry-After'], - title='Response Header Name', + description="The name of the response header defining how long to wait before retrying.", + examples=["Retry-After"], + title="Response Header Name", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class WaitUntilTimeFromHeader(BaseModel): - type: Literal['WaitUntilTimeFromHeader'] + type: Literal["WaitUntilTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['wait_time'], - title='Response Header', + description="The name of the response header defining how long to wait before retrying.", + examples=["wait_time"], + title="Response Header", ) min_wait: Optional[Union[float, str]] = Field( None, - description='Minimum time to wait before retrying.', - examples=[10, '60'], - title='Minimum Wait Time', + description="Minimum time to wait before retrying.", + examples=[10, "60"], + title="Minimum Wait Time", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddedFieldDefinition(BaseModel): - type: Literal['AddedFieldDefinition'] + type: Literal["AddedFieldDefinition"] path: List[str] = Field( ..., - description='List of strings defining the path where to add the value on the record.', - examples=[['segment_id'], ['metadata', 'segment_id']], - title='Path', + description="List of strings defining the path where to add the value on the record.", + examples=[["segment_id"], ["metadata", "segment_id"]], + title="Path", ) value: str = Field( ..., @@ -741,187 +735,185 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title='Value', + title="Value", ) value_type: Optional[ValueType] = Field( None, - description='Type of the value. If not specified, the type will be inferred from the value.', - title='Value Type', + description="Type of the value. If not specified, the type will be inferred from the value.", + title="Value Type", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddFields(BaseModel): - type: Literal['AddFields'] + type: Literal["AddFields"] fields: List[AddedFieldDefinition] = Field( ..., - description='List of transformations (path and corresponding value) that will be added to the record.', - title='Fields', + description="List of transformations (path and corresponding value) that will be added to the record.", + title="Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ApiKeyAuthenticator(BaseModel): - type: Literal['ApiKeyAuthenticator'] + type: Literal["ApiKeyAuthenticator"] api_token: Optional[str] = Field( None, - description='The API key to inject in the request. Fill it in the user inputs.', + description="The API key to inject in the request. Fill it in the user inputs.", examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title='API Key', + title="API Key", ) header: Optional[str] = Field( None, - description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', - examples=['Authorization', 'Api-Token', 'X-Auth-Token'], - title='Header Name', + description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", + examples=["Authorization", "Api-Token", "X-Auth-Token"], + title="Header Name", ) inject_into: Optional[RequestOption] = Field( None, - description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', + description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AuthFlow(BaseModel): - auth_flow_type: Optional[AuthFlowType] = Field( - None, description='The type of auth to use', title='Auth flow type' - ) + auth_flow_type: Optional[AuthFlowType] = Field(None, description="The type of auth to use", title="Auth flow type") predicate_key: Optional[List[str]] = Field( None, - description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', - examples=[['credentials', 'auth_type']], - title='Predicate key', + description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", + examples=[["credentials", "auth_type"]], + title="Predicate key", ) predicate_value: Optional[str] = Field( None, - description='Value of the predicate_key fields for the advanced auth to be applicable.', - examples=['Oauth'], - title='Predicate value', + description="Value of the predicate_key fields for the advanced auth to be applicable.", + examples=["Oauth"], + title="Predicate value", ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class CursorPagination(BaseModel): - type: Literal['CursorPagination'] + type: Literal["CursorPagination"] cursor_value: str = Field( ..., - description='Value of the cursor defining the next page to fetch.', + description="Value of the cursor defining the next page to fetch.", examples=[ - '{{ headers.link.next.cursor }}', + "{{ headers.link.next.cursor }}", "{{ last_records[-1]['key'] }}", "{{ response['nextPage'] }}", ], - title='Cursor Value', + title="Cursor Value", ) page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100], - title='Page Size', + title="Page Size", ) stop_condition: Optional[str] = Field( None, - description='Template string evaluating when to stop paginating.', + description="Template string evaluating when to stop paginating.", examples=[ - '{{ response.data.has_more is false }}', + "{{ response.data.has_more is false }}", "{{ 'next' not in headers['link'] }}", ], - title='Stop Condition', + title="Stop Condition", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DatetimeBasedCursor(BaseModel): - type: Literal['DatetimeBasedCursor'] + type: Literal["DatetimeBasedCursor"] cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', - examples=['created_at', "{{ config['record_cursor'] }}"], - title='Cursor Field', + description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + examples=["created_at", "{{ config['record_cursor'] }}"], + title="Cursor Field", ) datetime_format: str = Field( ..., - description='The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s', '%ms'], - title='Outgoing Datetime Format', + description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms"], + title="Outgoing Datetime Format", ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description='The datetime that determines the earliest record that should be synced.', - examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], - title='Start Datetime', + description="The datetime that determines the earliest record that should be synced.", + examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], + title="Start Datetime", ) cursor_datetime_formats: Optional[List[str]] = Field( None, - description='The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.', - title='Cursor Datetime Formats', + description="The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.", + title="Cursor Datetime Formats", ) cursor_granularity: Optional[str] = Field( None, - description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', - examples=['PT1S'], - title='Cursor Granularity', + description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", + examples=["PT1S"], + title="Cursor Granularity", ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', - examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], - title='End Datetime', + description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", + examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], + title="End Datetime", ) end_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the end datetime will be sent in requests to the source API.', - title='Inject End Time Into Outgoing HTTP Request', + description="Optionally configures how the end datetime will be sent in requests to the source API.", + title="Inject End Time Into Outgoing HTTP Request", ) is_data_feed: Optional[bool] = Field( None, - description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', - title='Whether the target API is formatted as a data feed', + description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", + title="Whether the target API is formatted as a data feed", ) lookback_window: Optional[str] = Field( None, - description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', - examples=['P1D', "P{{ config['lookback_days'] }}D"], - title='Lookback Window', + description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", + examples=["P1D", "P{{ config['lookback_days'] }}D"], + title="Lookback Window", ) partition_field_end: Optional[str] = Field( None, - description='Name of the partition start time field.', - examples=['ending_time'], - title='Partition Field End', + description="Name of the partition start time field.", + examples=["ending_time"], + title="Partition Field End", ) partition_field_start: Optional[str] = Field( None, - description='Name of the partition end time field.', - examples=['starting_time'], - title='Partition Field Start', + description="Name of the partition end time field.", + examples=["starting_time"], + title="Partition Field Start", ) start_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the start datetime will be sent in requests to the source API.', - title='Inject Start Time Into Outgoing HTTP Request', + description="Optionally configures how the start datetime will be sent in requests to the source API.", + title="Inject Start Time Into Outgoing HTTP Request", ) step: Optional[str] = Field( None, - description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', - examples=['P1W', "{{ config['step_increment'] }}"], - title='Step', + description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", + examples=["P1W", "{{ config['step_increment'] }}"], + title="Step", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultErrorHandler(BaseModel): - type: Literal['DefaultErrorHandler'] + type: Literal["DefaultErrorHandler"] backoff_strategies: Optional[ List[ Union[ @@ -934,145 +926,143 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', - title='Backoff Strategies', + description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", + title="Backoff Strategies", ) max_retries: Optional[int] = Field( 5, - description='The maximum number of time to retry a retryable request before giving up and failing.', + description="The maximum number of time to retry a retryable request before giving up and failing.", examples=[5, 0, 10], - title='Max Retry Count', + title="Max Retry Count", ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title='Response Filters', + title="Response Filters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultPaginator(BaseModel): - type: Literal['DefaultPaginator'] - pagination_strategy: Union[ - CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement - ] = Field( + type: Literal["DefaultPaginator"] + pagination_strategy: Union[CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement] = Field( ..., - description='Strategy defining how records are paginated.', - title='Pagination Strategy', + description="Strategy defining how records are paginated.", + title="Pagination Strategy", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DpathExtractor(BaseModel): - type: Literal['DpathExtractor'] + type: Literal["DpathExtractor"] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ['data'], - ['data', 'records'], - ['data', '{{ parameters.name }}'], - ['data', '*', 'record'], + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"], ], - title='Field Path', + title="Field Path", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal['ApiKey'] + type: Literal["ApiKey"] inject_into: RequestOption = Field( ..., - description='Configure how the API Key will be sent in requests to the source API.', + description="Configure how the API Key will be sent in requests to the source API.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) class ListPartitionRouter(BaseModel): - type: Literal['ListPartitionRouter'] + type: Literal["ListPartitionRouter"] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=['section', "{{ config['section_key'] }}"], - title='Current Partition Value Identifier', + examples=["section", "{{ config['section_key'] }}"], + title="Current Partition Value Identifier", ) values: Union[str, List[str]] = Field( ..., - description='The list of attributes being iterated over and used as input for the requests made to the source API.', - examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], - title='Partition Values', + description="The list of attributes being iterated over and used as input for the requests made to the source API.", + examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], + title="Partition Values", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the list value should be injected into and under what field name if applicable.', - title='Inject Partition Value Into Outgoing HTTP Request', + description="A request option describing where the list value should be injected into and under what field name if applicable.", + title="Inject Partition Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RecordSelector(BaseModel): - type: Literal['RecordSelector'] + type: Literal["RecordSelector"] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[RecordFilter] = Field( None, - description='Responsible for filtering records to be emitted by the Source.', - title='Record Filter', + description="Responsible for filtering records to be emitted by the Source.", + title="Record Filter", ) schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_ - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class Spec(BaseModel): - type: Literal['Spec'] + type: Literal["Spec"] connection_specification: Dict[str, Any] = Field( ..., - description='A connection specification describing how a the connector can be configured.', - title='Connection Specification', + description="A connection specification describing how a the connector can be configured.", + title="Connection Specification", ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=['https://docs.airbyte.com/integrations/sources/dremio'], - title='Documentation URL', + examples=["https://docs.airbyte.com/integrations/sources/dremio"], + title="Documentation URL", ) advanced_auth: Optional[AuthFlow] = Field( None, - description='Advanced specification for configuring the authentication flow.', - title='Advanced Auth', + description="Advanced specification for configuring the authentication flow.", + title="Advanced Auth", ) class CompositeErrorHandler(BaseModel): - type: Literal['CompositeErrorHandler'] + type: Literal["CompositeErrorHandler"] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description='List of error handlers to iterate on to determine how to handle a failed response.', - title='Error Handlers', + description="List of error handlers to iterate on to determine how to handle a failed response.", + title="Error Handlers", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal['DeclarativeSource'] + type: Literal["DeclarativeSource"] check: CheckStream streams: List[DeclarativeStream] version: str @@ -1081,7 +1071,7 @@ class Config: spec: Optional[Spec] = None metadata: Optional[Dict[str, Any]] = Field( None, - description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', + description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) @@ -1089,12 +1079,12 @@ class SelectiveAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['SelectiveAuthenticator'] + type: Literal["SelectiveAuthenticator"] authenticator_selection_path: List[str] = Field( ..., - description='Path of the field in config with selected authenticator name', - examples=[['auth'], ['auth', 'type']], - title='Authenticator Selection Path', + description="Path of the field in config with selected authenticator name", + examples=[["auth"], ["auth", "type"]], + title="Authenticator Selection Path", ) authenticators: Dict[ str, @@ -1110,119 +1100,109 @@ class Config: ], ] = Field( ..., - description='Authenticators to select from.', + description="Authenticators to select from.", examples=[ { - 'authenticators': { - 'token': '#/definitions/ApiKeyAuthenticator', - 'oauth': '#/definitions/OAuthAuthenticator', + "authenticators": { + "token": "#/definitions/ApiKeyAuthenticator", + "oauth": "#/definitions/OAuthAuthenticator", } } ], - title='Authenticators', + title="Authenticators", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal['DeclarativeStream'] + type: Literal["DeclarativeStream"] retriever: Union[CustomRetriever, SimpleRetriever] = Field( ..., - description='Component used to coordinate how records are extracted across stream slices and request pages.', - title='Retriever', + description="Component used to coordinate how records are extracted across stream slices and request pages.", + title="Retriever", ) - incremental_sync: Optional[ - Union[CustomIncrementalSync, DatetimeBasedCursor] - ] = Field( + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( None, - description='Component used to fetch data incrementally based on a time field in the data.', - title='Incremental Sync', - ) - name: Optional[str] = Field( - '', description='The stream name.', example=['Users'], title='Name' - ) - primary_key: Optional[PrimaryKey] = Field( - '', description='The primary key of the stream.', title='Primary Key' + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", ) + name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") + primary_key: Optional[PrimaryKey] = Field("", description="The primary key of the stream.", title="Primary Key") schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field( None, - description='Component used to retrieve the schema for the current stream.', - title='Schema Loader', + description="Component used to retrieve the schema for the current stream.", + title="Schema Loader", ) - transformations: Optional[ - List[Union[AddFields, CustomTransformation, RemoveFields]] - ] = Field( + transformations: Optional[List[Union[AddFields, CustomTransformation, RemoveFields]]] = Field( None, - description='A list of transformations to be applied to each output record.', - title='Transformations', + description="A list of transformations to be applied to each output record.", + title="Transformations", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenAuthenticator(BaseModel): - type: Literal['SessionTokenAuthenticator'] + type: Literal["SessionTokenAuthenticator"] login_requester: HttpRequester = Field( ..., - description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', + description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", examples=[ { - 'type': 'HttpRequester', - 'url_base': 'https://my_api.com', - 'path': '/login', - 'authenticator': { - 'type': 'BasicHttpAuthenticator', - 'username': '{{ config.username }}', - 'password': '{{ config.password }}', + "type": "HttpRequester", + "url_base": "https://my_api.com", + "path": "/login", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config.username }}", + "password": "{{ config.password }}", }, } ], - title='Login Requester', + title="Login Requester", ) session_token_path: List[str] = Field( ..., - description='The path in the response body returned from the login requester to the session token.', - examples=[['access_token'], ['result', 'token']], - title='Session Token Path', + description="The path in the response body returned from the login requester to the session token.", + examples=[["access_token"], ["result", "token"]], + title="Session Token Path", ) expiration_duration: Optional[str] = Field( None, - description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', - examples=['PT1H', 'P1D'], - title='Expiration Duration', + description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", + examples=["PT1H", "P1D"], + title="Expiration Duration", ) - request_authentication: Union[ - SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator - ] = Field( + request_authentication: Union[SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator] = Field( ..., - description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', - title='Data Request Authentication', + description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", + title="Data Request Authentication", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class HttpRequester(BaseModel): - type: Literal['HttpRequester'] + type: Literal["HttpRequester"] url_base: str = Field( ..., - description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - 'https://connect.squareup.com/v2', + "https://connect.squareup.com/v2", "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title='API Base URL', + title="API Base URL", ) path: str = Field( ..., - description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - '/products', + "/products", "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title='URL Path', + title="URL Path", ) authenticator: Optional[ Union[ @@ -1238,101 +1218,97 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description='Authentication method to use for requests sent to the API.', - title='Authenticator', + description="Authentication method to use for requests sent to the API.", + title="Authenticator", ) - error_handler: Optional[ - Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] - ] = Field( + error_handler: Optional[Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler]] = Field( None, - description='Error handler component that defines how to handle errors.', - title='Error Handler', + description="Error handler component that defines how to handle errors.", + title="Error Handler", ) http_method: Optional[HttpMethod] = Field( HttpMethod.GET, - description='The HTTP method used to fetch data from the source (can be GET or POST).', - examples=['GET', 'POST'], - title='HTTP Method', + description="The HTTP method used to fetch data from the source (can be GET or POST).", + examples=["GET", "POST"], + title="HTTP Method", ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', + description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title='Request Body Payload (Non-JSON)', + title="Request Body Payload (Non-JSON)", ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', + description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", examples=[ - {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, - {'key': "{{ config['value'] }}"}, - {'sort': {'field': 'updated_at', 'order': 'ascending'}}, + {"sort_order": "ASC", "sort_field": "CREATED_AT"}, + {"key": "{{ config['value'] }}"}, + {"sort": {"field": "updated_at", "order": "ascending"}}, ], - title='Request Body JSON Payload', + title="Request Body JSON Payload", ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', - examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], - title='Request Headers', + description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", + examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], + title="Request Headers", ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', + description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", examples=[ - {'unit': 'day'}, + {"unit": "day"}, { - 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, - {'sort_by[asc]': 'updated_at'}, + {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, + {"sort_by[asc]": "updated_at"}, ], - title='Query Parameters', + title="Query Parameters", ) use_cache: Optional[bool] = Field( False, - description='Enables stream requests caching. This field is automatically set by the CDK.', - title='Use Cache', + description="Enables stream requests caching. This field is automatically set by the CDK.", + title="Use Cache", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ParentStreamConfig(BaseModel): - type: Literal['ParentStreamConfig'] + type: Literal["ParentStreamConfig"] parent_key: str = Field( ..., - description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', - examples=['id', "{{ config['parent_record_id'] }}"], - title='Parent Key', - ) - stream: DeclarativeStream = Field( - ..., description='Reference to the parent stream.', title='Parent Stream' + description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", + examples=["id", "{{ config['parent_record_id'] }}"], + title="Parent Key", ) + stream: DeclarativeStream = Field(..., description="Reference to the parent stream.", title="Parent Stream") partition_field: str = Field( ..., - description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', - examples=['parent_id', "{{ config['parent_partition_field'] }}"], - title='Current Parent Key Value Identifier', + description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", + examples=["parent_id", "{{ config['parent_partition_field'] }}"], + title="Current Parent Key Value Identifier", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the parent key value should be injected into and under what field name if applicable.', - title='Request Option', + description="A request option describing where the parent key value should be injected into and under what field name if applicable.", + title="Request Option", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SimpleRetriever(BaseModel): - type: Literal['SimpleRetriever'] + type: Literal["SimpleRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', + description="Component that describes how to extract records from a HTTP response.", ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API.', + description="Requester component that describes how to prepare HTTP requests to send to the source API.", ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1347,28 +1323,24 @@ class SimpleRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SubstreamPartitionRouter(BaseModel): - type: Literal['SubstreamPartitionRouter'] + type: Literal["SubstreamPartitionRouter"] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', - title='Parent Stream Configs', + description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", + title="Parent Stream Configs", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") CompositeErrorHandler.update_forward_refs() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py index 9841bbd51dbaaf..5413709d9615a6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py @@ -81,7 +81,7 @@ def _get_request_option(self, request_option_type: RequestOptionType, stream_sli if self.request_option and self.request_option.inject_into == request_option_type and stream_slice: slice_value = stream_slice.get(self.cursor_field.eval(self.config)) if slice_value: - return {self.request_option.field_name: slice_value} + return {self.request_option.field_name.eval(self.config): slice_value} else: return {} else: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index c080e56a49ce07..3e915168c059fb 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -100,7 +100,7 @@ def _get_request_option(self, option_type: RequestOptionType, stream_slice: Stre key = parent_config.partition_field.eval(self.config) value = stream_slice.get(key) if value: - params.update({parent_config.request_option.field_name: value}) + params.update({parent_config.request_option.field_name.eval(config=self.config): value}) return params def stream_slices(self) -> Iterable[StreamSlice]: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index e23b948859fcfd..824efe9aed395a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -164,9 +164,9 @@ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping and isinstance(self.page_token_option, RequestOption) and self.page_token_option.inject_into == option_type ): - options[self.page_token_option.field_name] = self._token + options[self.page_token_option.field_name.eval(config=self.config)] = self._token if self.page_size_option and self.pagination_strategy.get_page_size() and self.page_size_option.inject_into == option_type: - options[self.page_size_option.field_name] = self.pagination_strategy.get_page_size() + options[self.page_size_option.field_name.eval(config=self.config)] = self.pagination_strategy.get_page_size() return options diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py index 47de679c8944ca..d13d2056681d15 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -4,7 +4,9 @@ from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any, Mapping +from typing import Any, Mapping, Union + +from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString class RequestOptionType(Enum): @@ -28,6 +30,9 @@ class RequestOption: inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter """ - field_name: str + field_name: Union[InterpolatedString, str] inject_into: RequestOptionType parameters: InitVar[Mapping[str, Any]] + + def __post_init__(self, parameters: Mapping[str, Any]) -> None: + self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/__init__.py index 56204e9b74e6d8..a05e5421000a25 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/__init__.py @@ -1,7 +1,4 @@ -from .abstract_file_based_availability_strategy import ( - AbstractFileBasedAvailabilityStrategy, - AbstractFileBasedAvailabilityStrategyWrapper, -) +from .abstract_file_based_availability_strategy import AbstractFileBasedAvailabilityStrategy, AbstractFileBasedAvailabilityStrategyWrapper from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy __all__ = ["AbstractFileBasedAvailabilityStrategy", "AbstractFileBasedAvailabilityStrategyWrapper", "DefaultFileBasedAvailabilityStrategy"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py index 646c595ebc374c..6ab66bb398882c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py @@ -1,5 +1,5 @@ from .abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCursor -from .file_based_noop_cursor import FileBasedNoopCursor from .file_based_concurrent_cursor import FileBasedConcurrentCursor +from .file_based_noop_cursor import FileBasedNoopCursor __all__ = ["AbstractConcurrentFileBasedCursor", "FileBasedConcurrentCursor", "FileBasedNoopCursor"] diff --git a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py index 0e8760b73cc7c2..41da6491636823 100644 --- a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py +++ b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py @@ -285,7 +285,7 @@ def test_process_multiple_chunks_with_relevant_fields(): "text: Special tokens like", "<|endoftext|> are treated like regular", "text", - ] + ], ), ( "Custom separator", diff --git a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/embedder_test.py b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/embedder_test.py index 3cf8e4114e5bfa..a5f22b752ed26d 100644 --- a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/embedder_test.py +++ b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/embedder_test.py @@ -118,8 +118,6 @@ def test_openai_chunking(): mock_embedding_instance.embed_documents.side_effect = lambda texts: [[0] * OPEN_AI_VECTOR_SIZE] * len(texts) - chunks = [ - Document(page_content="a", record=AirbyteRecordMessage(stream="mystream", data={}, emitted_at=0)) for _ in range(1005) - ] + chunks = [Document(page_content="a", record=AirbyteRecordMessage(stream="mystream", data={}, emitted_at=0)) for _ in range(1005)] assert embedder.embed_documents(chunks) == [[0] * OPEN_AI_VECTOR_SIZE] * 1005 mock_embedding_instance.embed_documents.assert_has_calls([call(["a"] * 1000), call(["a"] * 5)]) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py index 4db4a1ea0b0a5e..599667c42f9b81 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py @@ -83,7 +83,7 @@ def test_api_key_authenticator(test_name, header, token, expected_header, expect """ token_provider = InterpolatedStringTokenProvider(config=config, api_token=token, parameters=parameters) token_auth = ApiKeyAuthenticator( - request_option=RequestOption(inject_into=RequestOptionType.header, field_name=header, parameters={}), + request_option=RequestOption(inject_into=RequestOptionType.header, field_name=header, parameters=parameters), token_provider=token_provider, config=config, parameters=parameters, @@ -192,7 +192,7 @@ def test_api_key_authenticator_inject(test_name, field_name, token, expected_fie """ token_provider = InterpolatedStringTokenProvider(config=config, api_token=token, parameters=parameters) token_auth = ApiKeyAuthenticator( - request_option=RequestOption(inject_into=inject_type, field_name=field_name, parameters={}), + request_option=RequestOption(inject_into=inject_type, field_name=field_name, parameters=parameters), token_provider=token_provider, config=config, parameters=parameters, diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index 08cea962086ea4..c96c198505788a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -232,7 +232,7 @@ def test_full_config_stream(): assert isinstance(stream.retriever.paginator, DefaultPaginator) assert isinstance(stream.retriever.paginator.decoder, JsonDecoder) - assert stream.retriever.paginator.page_size_option.field_name == "page_size" + assert stream.retriever.paginator.page_size_option.field_name.eval(input_config) == "page_size" assert stream.retriever.paginator.page_size_option.inject_into == RequestOptionType.request_parameter assert isinstance(stream.retriever.paginator.page_token_option, RequestPath) assert stream.retriever.paginator.url_base.string == "https://api.sendgrid.com/v3/" @@ -422,7 +422,7 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): assert isinstance(partition_router, ListPartitionRouter) assert partition_router.values == ["airbyte", "airbyte-cloud"] assert partition_router.request_option.inject_into == RequestOptionType.header - assert partition_router.request_option.field_name == "repository" + assert partition_router.request_option.field_name.eval(config=input_config) == "repository" def test_create_substream_partition_router(): @@ -484,7 +484,7 @@ def test_create_substream_partition_router(): assert partition_router.parent_stream_configs[0].parent_key.eval({}) == "id" assert partition_router.parent_stream_configs[0].partition_field.eval({}) == "repository_id" assert partition_router.parent_stream_configs[0].request_option.inject_into == RequestOptionType.request_parameter - assert partition_router.parent_stream_configs[0].request_option.field_name == "repository_id" + assert partition_router.parent_stream_configs[0].request_option.field_name.eval(config=input_config) == "repository_id" assert partition_router.parent_stream_configs[1].parent_key.eval({}) == "someid" assert partition_router.parent_stream_configs[1].partition_field.eval({}) == "word_id" @@ -509,17 +509,17 @@ def test_datetime_based_cursor(): start_time_option: type: RequestOption inject_into: request_parameter - field_name: created[gte] + field_name: "since_{{ config['cursor_field'] }}" end_time_option: type: RequestOption inject_into: body_json - field_name: end_time + field_name: "before_{{ parameters['cursor_field'] }}" partition_field_start: star partition_field_end: en """ parsed_manifest = YamlDeclarativeSource._parse(content) resolved_manifest = resolver.preprocess_manifest(parsed_manifest) - slicer_manifest = transformer.propagate_types_and_parameters("", resolved_manifest["incremental"], {}) + slicer_manifest = transformer.propagate_types_and_parameters("", resolved_manifest["incremental"], {"cursor_field": "created_at"}) stream_slicer = factory.create_component(model_type=DatetimeBasedCursorModel, component_definition=slicer_manifest, config=input_config) @@ -529,9 +529,9 @@ def test_datetime_based_cursor(): assert stream_slicer.cursor_granularity == "PT0.000001S" assert stream_slicer.lookback_window.string == "P5D" assert stream_slicer.start_time_option.inject_into == RequestOptionType.request_parameter - assert stream_slicer.start_time_option.field_name == "created[gte]" + assert stream_slicer.start_time_option.field_name.eval(config=input_config | {"cursor_field": "updated_at"}) == "since_updated_at" assert stream_slicer.end_time_option.inject_into == RequestOptionType.body_json - assert stream_slicer.end_time_option.field_name == "end_time" + assert stream_slicer.end_time_option.field_name.eval({}) == "before_created_at" assert stream_slicer.partition_field_start.eval({}) == "star" assert stream_slicer.partition_field_end.eval({}) == "en" @@ -937,18 +937,20 @@ def test_create_request_with_session_authenticator(): } -@pytest.mark.parametrize("input_config, expected_authenticator_class", [ - pytest.param( - {"auth": {"type": "token"}, "credentials": {"api_key": "some_key"}}, - ApiKeyAuthenticator, - id="test_create_requester_with_selective_authenticator_and_token_selected", - ), - pytest.param( - {"auth": {"type": "oauth"}, "credentials": {"client_id": "ABC"}}, - DeclarativeOauth2Authenticator, - id="test_create_requester_with_selective_authenticator_and_oauth_selected", - ), -] +@pytest.mark.parametrize( + "input_config, expected_authenticator_class", + [ + pytest.param( + {"auth": {"type": "token"}, "credentials": {"api_key": "some_key"}}, + ApiKeyAuthenticator, + id="test_create_requester_with_selective_authenticator_and_token_selected", + ), + pytest.param( + {"auth": {"type": "oauth"}, "credentials": {"client_id": "ABC"}}, + DeclarativeOauth2Authenticator, + id="test_create_requester_with_selective_authenticator_and_oauth_selected", + ), + ], ) def test_create_requester_with_selective_authenticator(input_config, expected_authenticator_class): content = """ @@ -1121,7 +1123,7 @@ def test_create_default_paginator(): assert isinstance(paginator.page_size_option, RequestOption) assert paginator.page_size_option.inject_into == RequestOptionType.request_parameter - assert paginator.page_size_option.field_name == "page_size" + assert paginator.page_size_option.field_name.eval(config=input_config) == "page_size" assert isinstance(paginator.page_token_option, RequestPath) @@ -1294,7 +1296,7 @@ def test_custom_components_do_not_contain_extra_fields(): assert custom_substream_partition_router.parent_stream_configs[0].parent_key.eval({}) == "id" assert custom_substream_partition_router.parent_stream_configs[0].partition_field.eval({}) == "repository_id" assert custom_substream_partition_router.parent_stream_configs[0].request_option.inject_into == RequestOptionType.request_parameter - assert custom_substream_partition_router.parent_stream_configs[0].request_option.field_name == "repository_id" + assert custom_substream_partition_router.parent_stream_configs[0].request_option.field_name.eval(config=input_config) == "repository_id" assert isinstance(custom_substream_partition_router.custom_pagination_strategy, PageIncrement) assert custom_substream_partition_router.custom_pagination_strategy.page_size == 100 @@ -1343,7 +1345,7 @@ def test_parse_custom_component_fields_if_subcomponent(): assert custom_substream_partition_router.parent_stream_configs[0].parent_key.eval({}) == "id" assert custom_substream_partition_router.parent_stream_configs[0].partition_field.eval({}) == "repository_id" assert custom_substream_partition_router.parent_stream_configs[0].request_option.inject_into == RequestOptionType.request_parameter - assert custom_substream_partition_router.parent_stream_configs[0].request_option.field_name == "repository_id" + assert custom_substream_partition_router.parent_stream_configs[0].request_option.field_name.eval(config=input_config) == "repository_id" assert isinstance(custom_substream_partition_router.custom_pagination_strategy, PageIncrement) assert custom_substream_partition_router.custom_pagination_strategy.page_size == 100 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py b/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py index ce1b93b9d75be9..3a83af1eb71495 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py @@ -12,39 +12,40 @@ @pytest.mark.parametrize( - "test_name, partition_values, cursor_field, expected_slices", + "partition_values, cursor_field, expected_slices", [ ( - "test_single_element", ["customer", "store", "subscription"], "owner_resource", [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], ), ( - "test_input_list_is_string", '["customer", "store", "subscription"]', "owner_resource", [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], ), ( - "test_using_cursor_from_parameters", '["customer", "store", "subscription"]', "{{ parameters['cursor_field'] }}", [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], ), ], + ids=[ + "test_single_element", + "test_input_list_is_string", + "test_using_cursor_from_parameters", + ], ) -def test_list_partition_router(test_name, partition_values, cursor_field, expected_slices): +def test_list_partition_router(partition_values, cursor_field, expected_slices): slicer = ListPartitionRouter(values=partition_values, cursor_field=cursor_field, config={}, parameters=parameters) slices = [s for s in slicer.stream_slices()] assert slices == expected_slices @pytest.mark.parametrize( - "test_name, request_option, expected_req_params, expected_headers, expected_body_json, expected_body_data", + "request_option, expected_req_params, expected_headers, expected_body_json, expected_body_data", [ ( - "test_inject_into_req_param", RequestOption(inject_into=RequestOptionType.request_parameter, parameters={}, field_name="owner_resource"), {"owner_resource": "customer"}, {}, @@ -52,7 +53,6 @@ def test_list_partition_router(test_name, partition_values, cursor_field, expect {}, ), ( - "test_pass_by_header", RequestOption(inject_into=RequestOptionType.header, parameters={}, field_name="owner_resource"), {}, {"owner_resource": "customer"}, @@ -60,7 +60,6 @@ def test_list_partition_router(test_name, partition_values, cursor_field, expect {}, ), ( - "test_inject_into_body_json", RequestOption(inject_into=RequestOptionType.body_json, parameters={}, field_name="owner_resource"), {}, {}, @@ -68,7 +67,6 @@ def test_list_partition_router(test_name, partition_values, cursor_field, expect {}, ), ( - "test_inject_into_body_data", RequestOption(inject_into=RequestOptionType.body_data, parameters={}, field_name="owner_resource"), {}, {}, @@ -76,8 +74,14 @@ def test_list_partition_router(test_name, partition_values, cursor_field, expect {"owner_resource": "customer"}, ), ], + ids=[ + "test_inject_into_req_param", + "test_pass_by_header", + "test_inject_into_body_json", + "test_inject_into_body_data", + ], ) -def test_request_option(test_name, request_option, expected_req_params, expected_headers, expected_body_json, expected_body_data): +def test_request_option(request_option, expected_req_params, expected_headers, expected_body_json, expected_body_data): partition_router = ListPartitionRouter( values=partition_values, cursor_field=cursor_field, config={}, request_option=request_option, parameters={} ) @@ -89,6 +93,31 @@ def test_request_option(test_name, request_option, expected_req_params, expected assert expected_body_data == partition_router.get_request_body_data(stream_slice=stream_slice) +@pytest.mark.parametrize( + "field_name_interpolation, expected_request_params", + [ + ("{{parameters['partition_name']}}", {"parameters_partition": "customer"}), + ("{{config['partition_name']}}", {"config_partition": "customer"}), + ], + ids=[ + "parameters_interpolation", + "config_interpolation", + ], +) +def test_request_options_interpolation(field_name_interpolation: str, expected_request_params: dict): + config = {"partition_name": "config_partition"} + parameters = {"partition_name": "parameters_partition"} + request_option = RequestOption( + inject_into=RequestOptionType.request_parameter, parameters=parameters, field_name=field_name_interpolation + ) + partition_router = ListPartitionRouter( + values=partition_values, cursor_field=cursor_field, config=config, request_option=request_option, parameters=parameters + ) + stream_slice = {cursor_field: "customer"} + + assert expected_request_params == partition_router.get_request_params(stream_slice=stream_slice) + + def test_request_option_before_updating_cursor(): request_option = RequestOption(inject_into=RequestOptionType.request_parameter, parameters={}, field_name="owner_resource") partition_router = ListPartitionRouter( diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py b/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py index e677666f46ebaa..618a0fdb23e903 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py @@ -57,11 +57,10 @@ def read_records( @pytest.mark.parametrize( - "test_name, parent_stream_configs, expected_slices", + "parent_stream_configs, expected_slices", [ - ("test_no_parents", [], None), + ([], None), ( - "test_single_parent_slices_no_records", [ ParentStreamConfig( stream=MockStream([{}], [], "first_stream"), @@ -74,7 +73,6 @@ def read_records( [], ), ( - "test_single_parent_slices_with_records", [ ParentStreamConfig( stream=MockStream([{}], parent_records, "first_stream"), @@ -87,7 +85,6 @@ def read_records( [{"first_stream_id": 1, "parent_slice": {}}, {"first_stream_id": 2, "parent_slice": {}}], ), ( - "test_with_parent_slices_and_records", [ ParentStreamConfig( stream=MockStream(parent_slices, all_parent_data, "first_stream"), @@ -104,7 +101,6 @@ def read_records( ], ), ( - "test_multiple_parent_streams", [ ParentStreamConfig( stream=MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), @@ -130,7 +126,6 @@ def read_records( ], ), ( - "test_missed_parent_key", [ ParentStreamConfig( stream=MockStream([{}], [{"id": 0}, {"id": 1}, {"_id": 2}, {"id": 3}], "first_stream"), @@ -147,7 +142,6 @@ def read_records( ], ), ( - "test_dpath_extraction", [ ParentStreamConfig( stream=MockStream([{}], [{"a": {"b": 0}}, {"a": {"b": 1}}, {"a": {"c": 2}}, {"a": {"b": 3}}], "first_stream"), @@ -164,8 +158,17 @@ def read_records( ], ), ], + ids=[ + "test_no_parents", + "test_single_parent_slices_no_records", + "test_single_parent_slices_with_records", + "test_with_parent_slices_and_records", + "test_multiple_parent_streams", + "test_missed_parent_key", + "test_dpath_extraction", + ], ) -def test_substream_slicer(test_name, parent_stream_configs, expected_slices): +def test_substream_slicer(parent_stream_configs, expected_slices): if expected_slices is None: try: SubstreamPartitionRouter(parent_stream_configs=parent_stream_configs, parameters={}, config={}) @@ -178,10 +181,9 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): @pytest.mark.parametrize( - "test_name, parent_stream_request_parameters, expected_req_params, expected_headers, expected_body_json, expected_body_data", + "parent_stream_request_parameters, expected_req_params, expected_headers, expected_body_json, expected_body_data", [ ( - "test_request_option_in_request_param", [ RequestOption(inject_into=RequestOptionType.request_parameter, parameters={}, field_name="first_stream"), RequestOption(inject_into=RequestOptionType.request_parameter, parameters={}, field_name="second_stream"), @@ -192,7 +194,6 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): {}, ), ( - "test_request_option_in_header", [ RequestOption(inject_into=RequestOptionType.header, parameters={}, field_name="first_stream"), RequestOption(inject_into=RequestOptionType.header, parameters={}, field_name="second_stream"), @@ -203,7 +204,6 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): {}, ), ( - "test_request_option_in_param_and_header", [ RequestOption(inject_into=RequestOptionType.request_parameter, parameters={}, field_name="first_stream"), RequestOption(inject_into=RequestOptionType.header, parameters={}, field_name="second_stream"), @@ -214,7 +214,6 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): {}, ), ( - "test_request_option_in_body_json", [ RequestOption(inject_into=RequestOptionType.body_json, parameters={}, field_name="first_stream"), RequestOption(inject_into=RequestOptionType.body_json, parameters={}, field_name="second_stream"), @@ -225,7 +224,6 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): {}, ), ( - "test_request_option_in_body_data", [ RequestOption(inject_into=RequestOptionType.body_data, parameters={}, field_name="first_stream"), RequestOption(inject_into=RequestOptionType.body_data, parameters={}, field_name="second_stream"), @@ -236,9 +234,15 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): {"first_stream": "1234", "second_stream": "4567"}, ), ], + ids=[ + "test_request_option_in_request_param", + "test_request_option_in_header", + "test_request_option_in_param_and_header", + "test_request_option_in_body_json", + "test_request_option_in_body_data", + ], ) def test_request_option( - test_name, parent_stream_request_parameters, expected_req_params, expected_headers, @@ -275,6 +279,61 @@ def test_request_option( assert expected_body_data == partition_router.get_request_body_data(stream_slice=stream_slice) +@pytest.mark.parametrize( + "field_name_first_stream, field_name_second_stream, expected_request_params", + [ + ( + "{{parameters['field_name_first_stream']}}", + "{{parameters['field_name_second_stream']}}", + {"parameter_first_stream_id": "1234", "parameter_second_stream_id": "4567"}, + ), + ( + "{{config['field_name_first_stream']}}", + "{{config['field_name_second_stream']}}", + {"config_first_stream_id": "1234", "config_second_stream_id": "4567"}, + ), + ], + ids=[ + "parameters_interpolation", + "config_interpolation", + ], +) +def test_request_params_interpolation_for_parent_stream( + field_name_first_stream: str, field_name_second_stream: str, expected_request_params: dict +): + config = {"field_name_first_stream": "config_first_stream_id", "field_name_second_stream": "config_second_stream_id"} + parameters = {"field_name_first_stream": "parameter_first_stream_id", "field_name_second_stream": "parameter_second_stream_id"} + partition_router = SubstreamPartitionRouter( + parent_stream_configs=[ + ParentStreamConfig( + stream=MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), + parent_key="id", + partition_field="first_stream_id", + parameters=parameters, + config=config, + request_option=RequestOption( + inject_into=RequestOptionType.request_parameter, parameters=parameters, field_name=field_name_first_stream + ), + ), + ParentStreamConfig( + stream=MockStream(second_parent_stream_slice, more_records, "second_stream"), + parent_key="id", + partition_field="second_stream_id", + parameters=parameters, + config=config, + request_option=RequestOption( + inject_into=RequestOptionType.request_parameter, parameters=parameters, field_name=field_name_second_stream + ), + ), + ], + parameters=parameters, + config=config, + ) + stream_slice = {"first_stream_id": "1234", "second_stream_id": "4567"} + + assert expected_request_params == partition_router.get_request_params(stream_slice=stream_slice) + + def test_given_record_is_airbyte_message_when_stream_slices_then_use_record_data(): parent_slice = {} partition_router = SubstreamPartitionRouter( diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py index 17ef223171c379..d8326d5227ecbd 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py @@ -21,10 +21,9 @@ @pytest.mark.parametrize( - "test_name, page_token_request_option, stop_condition, expected_updated_path, expected_request_params, expected_headers, expected_body_data, expected_body_json, last_records, expected_next_page_token, limit", + "page_token_request_option, stop_condition, expected_updated_path, expected_request_params, expected_headers, expected_body_data, expected_body_json, last_records, expected_next_page_token, limit", [ ( - "test_default_paginator_path", RequestPath(parameters={}), None, "/next_url", @@ -37,7 +36,6 @@ 2, ), ( - "test_default_paginator_request_param", RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", parameters={}), None, None, @@ -50,7 +48,6 @@ 2, ), ( - "test_default_paginator_no_token", RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", parameters={}), InterpolatedBoolean(condition="{{True}}", parameters={}), None, @@ -63,7 +60,6 @@ 2, ), ( - "test_default_paginator_cursor_header", RequestOption(inject_into=RequestOptionType.header, field_name="from", parameters={}), None, None, @@ -76,7 +72,6 @@ 2, ), ( - "test_default_paginator_cursor_body_data", RequestOption(inject_into=RequestOptionType.body_data, field_name="from", parameters={}), None, None, @@ -89,7 +84,6 @@ 2, ), ( - "test_default_paginator_cursor_body_json", RequestOption(inject_into=RequestOptionType.body_json, field_name="from", parameters={}), None, None, @@ -102,9 +96,16 @@ 2, ), ], + ids=[ + "test_default_paginator_path", + "test_default_paginator_request_param", + "test_default_paginator_no_token", + "test_default_paginator_cursor_header", + "test_default_paginator_cursor_body_data", + "test_default_paginator_cursor_body_json", + ], ) def test_default_paginator_with_cursor( - test_name, page_token_request_option, stop_condition, expected_updated_path, @@ -116,7 +117,9 @@ def test_default_paginator_with_cursor( expected_next_page_token, limit, ): - page_size_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="limit", parameters={}) + page_size_request_option = RequestOption( + inject_into=RequestOptionType.request_parameter, field_name="{{parameters['page_limit']}}", parameters={"page_limit": "limit"} + ) cursor_value = "{{ response.next }}" url_base = "https://airbyte.io" config = {} @@ -157,6 +160,62 @@ def test_default_paginator_with_cursor( assert actual_body_json == expected_body_json +@pytest.mark.parametrize( + "field_name_page_size_interpolation, field_name_page_token_interpolation, expected_request_params", + [ + ( + "{{parameters['page_limit']}}", + "{{parameters['page_token']}}", + {"parameters_limit": 50, "parameters_token": "https://airbyte.io/next_url"}, + ), + ("{{config['page_limit']}}", "{{config['page_token']}}", {"config_limit": 50, "config_token": "https://airbyte.io/next_url"}), + ], + ids=[ + "parameters_interpolation", + "config_interpolation", + ], +) +def test_paginator_request_param_interpolation( + field_name_page_size_interpolation: str, field_name_page_token_interpolation: str, expected_request_params: dict +): + config = {"page_limit": "config_limit", "page_token": "config_token"} + parameters = {"page_limit": "parameters_limit", "page_token": "parameters_token"} + page_size_request_option = RequestOption( + inject_into=RequestOptionType.request_parameter, + field_name=field_name_page_size_interpolation, + parameters=parameters, + ) + cursor_value = "{{ response.next }}" + url_base = "https://airbyte.io" + limit = 50 + strategy = CursorPaginationStrategy( + page_size=limit, + cursor_value=cursor_value, + stop_condition=None, + decoder=JsonDecoder(parameters={}), + config=config, + parameters=parameters, + ) + paginator = DefaultPaginator( + page_size_option=page_size_request_option, + page_token_option=RequestOption( + inject_into=RequestOptionType.request_parameter, field_name=field_name_page_token_interpolation, parameters=parameters + ), + pagination_strategy=strategy, + config=config, + url_base=url_base, + parameters=parameters, + ) + response = requests.Response() + response.headers = {"A_HEADER": "HEADER_VALUE"} + response_body = {"next": "https://airbyte.io/next_url"} + response._content = json.dumps(response_body).encode("utf-8") + last_records = [{"id": 0}, {"id": 1}] + paginator.next_page_token(response, last_records) + actual_request_params = paginator.get_request_params() + assert actual_request_params == expected_request_params + + def test_page_size_option_cannot_be_set_if_strategy_has_no_limit(): page_size_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="page_size", parameters={}) page_token_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="offset", parameters={}) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 1dc2b070fa65a3..5caa11f57f1653 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -7,15 +7,37 @@ @pytest.mark.parametrize( - "test_name, option_type, field_name", + "option_type, field_name, expected_field_name", [ - ("test_limit_param_with_field_name", RequestOptionType.request_parameter, "field"), - ("test_limit_header_with_field_name", RequestOptionType.header, "field"), - ("test_limit_data_with_field_name", RequestOptionType.body_data, "field"), - ("test_limit_json_with_field_name", RequestOptionType.body_json, "field"), + (RequestOptionType.request_parameter, "field", "field"), + (RequestOptionType.header, "field", "field"), + (RequestOptionType.body_data, "field", "field"), + (RequestOptionType.body_json, "field", "field"), + (RequestOptionType.request_parameter, "since_{{ parameters['cursor_field'] }}", "since_updated_at"), + (RequestOptionType.header, "since_{{ parameters['cursor_field'] }}", "since_updated_at"), + (RequestOptionType.body_data, "since_{{ parameters['cursor_field'] }}", "since_updated_at"), + (RequestOptionType.body_json, "since_{{ parameters['cursor_field'] }}", "since_updated_at"), + (RequestOptionType.request_parameter, "since_{{ config['cursor_field'] }}", "since_created_at"), + (RequestOptionType.header, "since_{{ config['cursor_field'] }}", "since_created_at"), + (RequestOptionType.body_data, "since_{{ config['cursor_field'] }}", "since_created_at"), + (RequestOptionType.body_json, "since_{{ config['cursor_field'] }}", "since_created_at"), + ], + ids=[ + "test_limit_param_with_field_name", + "test_limit_header_with_field_name", + "test_limit_data_with_field_name", + "test_limit_json_with_field_name", + "test_limit_param_with_parameters_interpolation", + "test_limit_header_with_parameters_interpolation", + "test_limit_data_with_parameters_interpolation", + "test_limit_json_with_parameters_interpolation", + "test_limit_param_with_config_interpolation", + "test_limit_header_with_config_interpolation", + "test_limit_data_with_config_interpolation", + "test_limit_json_with_config_interpolation", ], ) -def test_request_option(test_name, option_type, field_name): - request_option = RequestOption(inject_into=option_type, field_name=field_name, parameters={}) - assert request_option.field_name == field_name +def test_request_option(option_type: RequestOptionType, field_name: str, expected_field_name: str): + request_option = RequestOption(inject_into=option_type, field_name=field_name, parameters={"cursor_field": "updated_at"}) + assert request_option.field_name.eval({"cursor_field": "created_at"}) == expected_field_name assert request_option.inject_into == option_type diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/file_types/test_unstructured_parser.py b/airbyte-cdk/python/unit_tests/sources/file_based/file_types/test_unstructured_parser.py index 311d4a0ad158a6..9bc096c5136e20 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/file_types/test_unstructured_parser.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/file_types/test_unstructured_parser.py @@ -86,7 +86,10 @@ def test_infer_schema(mock_detect_filetype, filetype, format_config, raises): assert schema == { "content": {"type": "string", "description": "Content of the file as markdown. Might be null if the file could not be parsed"}, "document_key": {"type": "string", "description": "Unique identifier of the document, e.g. the file path"}, - "_ab_source_file_parse_error": {"type": "string", "description": "Error message if the file could not be parsed even though the file is supported"}, + "_ab_source_file_parse_error": { + "type": "string", + "description": "Error message if the file could not be parsed even though the file is supported", + }, } loop.close() asyncio.set_event_loop(main_loop) @@ -201,7 +204,7 @@ def test_infer_schema(mock_detect_filetype, filetype, format_config, raises): { "content": None, "document_key": FILE_URI, - "_ab_source_file_parse_error": "Error parsing record. This could be due to a mismatch between the config's file type and the actual file type, or because the file or record is not parseable. Contact Support if you need assistance.\nfilename=path/to/file.xyz message=weird parsing error" + "_ab_source_file_parse_error": "Error parsing record. This could be due to a mismatch between the config's file type and the actual file type, or because the file or record is not parseable. Contact Support if you need assistance.\nfilename=path/to/file.xyz message=weird parsing error", } ], True, @@ -323,33 +326,48 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp None, "test", [{"type": "Text", "text": "test"}], - [call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")})], - False, [ - { - "content": "test", - "document_key": FILE_URI, - "_ab_source_file_parse_error": None - } + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ) ], + False, + [{"content": "test", "document_key": FILE_URI, "_ab_source_file_parse_error": None}], 200, id="basic_request", ), pytest.param( FileType.PDF, - UnstructuredFormat(skip_unprocessable_file_types=False, strategy="hi_res", processing=APIProcessingConfigModel(mode="api", api_key="test", api_url="http://localhost:8000", parameters=[APIParameterConfigModel(name="include_page_breaks", value="true"), APIParameterConfigModel(name="ocr_languages", value="eng"), APIParameterConfigModel(name="ocr_languages", value="kor")])), + UnstructuredFormat( + skip_unprocessable_file_types=False, + strategy="hi_res", + processing=APIProcessingConfigModel( + mode="api", + api_key="test", + api_url="http://localhost:8000", + parameters=[ + APIParameterConfigModel(name="include_page_breaks", value="true"), + APIParameterConfigModel(name="ocr_languages", value="eng"), + APIParameterConfigModel(name="ocr_languages", value="kor"), + ], + ), + ), None, "test", [{"type": "Text", "text": "test"}], - [call("http://localhost:8000/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "hi_res", "include_page_breaks": "true", "ocr_languages": ["eng", "kor"]}, files={"files": ("filename", mock.ANY, "application/pdf")})], - False, [ - { - "content": "test", - "document_key": FILE_URI, - "_ab_source_file_parse_error": None - } + call( + "http://localhost:8000/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "hi_res", "include_page_breaks": "true", "ocr_languages": ["eng", "kor"]}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ) ], + False, + [{"content": "test", "document_key": FILE_URI, "_ab_source_file_parse_error": None}], 200, id="request_with_params", ), @@ -361,13 +379,7 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp None, None, False, - [ - { - "content": "# Mymarkdown", - "document_key": FILE_URI, - "_ab_source_file_parse_error": None - } - ], + [{"content": "# Mymarkdown", "document_key": FILE_URI, "_ab_source_file_parse_error": None}], 200, id="handle_markdown_locally", ), @@ -384,15 +396,40 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp "test", None, [ - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), ], True, @@ -411,21 +448,30 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp "test", [{"type": "Text", "text": "test"}], [ - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), ], False, - [ - { - "content": "test", - "document_key": FILE_URI, - "_ab_source_file_parse_error": None - } - ], + [{"content": "test", "document_key": FILE_URI, "_ab_source_file_parse_error": None}], 200, id="retry_and_recover", ), @@ -438,7 +484,12 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp "test", [{"type": "Text", "text": "test"}], [ - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), ], True, @@ -455,7 +506,12 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp "test", [{"type": "Text", "text": "test"}], [ - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), call().raise_for_status(), ], True, @@ -470,7 +526,12 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp "test", [{"detail": "Something went wrong"}], [ - call("https://api.unstructured.io/general/v0/general", headers={"accept": "application/json", "unstructured-api-key": "test"}, data={"strategy": "auto"}, files={"files": ("filename", mock.ANY, "application/pdf")}), + call( + "https://api.unstructured.io/general/v0/general", + headers={"accept": "application/json", "unstructured-api-key": "test"}, + data={"strategy": "auto"}, + files={"files": ("filename", mock.ANY, "application/pdf")}, + ), ], False, [ @@ -487,7 +548,7 @@ def test_check_config(requests_mock, format_config, raises_for_status, json_resp ) @patch("airbyte_cdk.sources.file_based.file_types.unstructured_parser.requests") @patch("airbyte_cdk.sources.file_based.file_types.unstructured_parser.detect_filetype") -@patch('time.sleep', side_effect=lambda _: None) +@patch("time.sleep", side_effect=lambda _: None) def test_parse_records_remotely( time_mock, mock_detect_filetype, @@ -500,7 +561,7 @@ def test_parse_records_remotely( expected_requests, raises, expected_records, - http_status_code + http_status_code, ): stream_reader = MagicMock() mock_open(stream_reader.open_file, read_data=bytes(str(file_content), "utf-8")) diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py index 0629d683fec8d0..ccbcc1c7116a92 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py @@ -42,10 +42,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"}, - "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_some_old_file.csv" - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"}, + "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_some_old_file.csv", + }, + ) + .build(), ) ) .set_expected_records( @@ -137,10 +142,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, - "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv", - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, + "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv", + }, + ) + .build(), ) ) .set_expected_records( @@ -214,10 +224,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-01T03:54:07.000000Z"}, - "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_a.csv", - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-01T03:54:07.000000Z"}, + "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_a.csv", + }, + ) + .build(), ) ) .set_expected_records( @@ -580,12 +595,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( - "stream1", { + input_state=StateBuilder() + .with_stream_state( + "stream1", + { "history": {"recent_file.csv": "2023-07-15T23:59:59.000000Z"}, "_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv", }, - ).build(), + ) + .build(), ) ) ).build() @@ -1022,10 +1040,12 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", - {"history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, - "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv"}).build(), + {"history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv"}, + ) + .build(), ) ) ).build() @@ -1146,13 +1166,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"}, "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -1273,13 +1295,15 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"}, "_ab_source_file_last_modified": "2023-06-03T03:54:07.000000Z_x.csv", - } - ).build() + }, + ) + .build() ) ) ).build() @@ -1454,7 +1478,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -1463,8 +1488,9 @@ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -1639,7 +1665,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -1648,8 +1675,9 @@ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-05-01T03:54:07.000000Z_very_very_very_old_file.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -2123,7 +2151,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2132,8 +2161,9 @@ "d.csv": "2023-06-05T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -2239,7 +2269,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2248,8 +2279,9 @@ "d.csv": "2023-06-05T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -2378,7 +2410,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2387,8 +2420,9 @@ "e.csv": "2023-06-08T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() @@ -2516,7 +2550,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2525,15 +2560,18 @@ "e.csv": "2023-06-08T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer = ( TestScenarioBuilder() - .set_name("multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer") + .set_name( + "multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer" + ) .set_config( { "streams": [ @@ -2680,7 +2718,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2689,15 +2728,18 @@ "d.csv": "2023-06-08T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older = ( TestScenarioBuilder() - .set_name("multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older") + .set_name( + "multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older" + ) .set_config( { "streams": [ @@ -2844,7 +2886,8 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state( + input_state=StateBuilder() + .with_stream_state( "stream1", { "history": { @@ -2853,8 +2896,9 @@ "d.csv": "2023-06-08T03:54:07.000000Z", }, "_ab_source_file_last_modified": "2023-06-04T00:00:00.000000Z_very_old_file.csv", - } - ).build(), + }, + ) + .build(), ) ) ).build() diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py index 0f3c0f0ef2a7a5..bba3977db2fda4 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -324,9 +324,7 @@ "processing": { "title": "Processing", "description": "Processing configuration", - "default": { - "mode": "local" - }, + "default": {"mode": "local"}, "type": "object", "oneOf": [ { @@ -337,16 +335,12 @@ "title": "Mode", "default": "local", "const": "local", - "enum": [ - "local" - ], - "type": "string" + "enum": ["local"], + "type": "string", } }, "description": "Process files locally, supporting `fast` and `ocr` modes. This is the default option.", - "required": [ - "mode" - ] + "required": ["mode"], }, { "title": "via API", @@ -356,10 +350,8 @@ "title": "Mode", "default": "api", "const": "api", - "enum": [ - "api" - ], - "type": "string" + "enum": ["api"], + "type": "string", }, "api_key": { "title": "API Key", @@ -367,17 +359,15 @@ "default": "", "always_show": True, "airbyte_secret": True, - "type": "string" + "type": "string", }, "api_url": { "title": "API URL", "description": "The URL of the unstructured API to use", "default": "https://api.unstructured.io", "always_show": True, - "examples": [ - "https://api.unstructured.com" - ], - "type": "string" + "examples": ["https://api.unstructured.com"], + "type": "string", }, "parameters": { "title": "Additional URL Parameters", @@ -392,35 +382,24 @@ "name": { "title": "Parameter name", "description": "The name of the unstructured API parameter to use", - "examples": [ - "combine_under_n_chars", - "languages" - ], - "type": "string" + "examples": ["combine_under_n_chars", "languages"], + "type": "string", }, "value": { "title": "Value", "description": "The value of the parameter", - "examples": [ - "true", - "hi_res" - ], - "type": "string" - } + "examples": ["true", "hi_res"], + "type": "string", + }, }, - "required": [ - "name", - "value" - ] - } - } + "required": ["name", "value"], + }, + }, }, "description": "Process files via an API, using the `hi_res` mode. This option is useful for increased performance and accuracy, but requires an API key and a hosted instance of unstructured.", - "required": [ - "mode" - ] - } - ] + "required": ["mode"], + }, + ], }, }, "description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.", diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/incremental_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/incremental_scenarios.py index 1b826d8766f720..3b9785e11bfed5 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/incremental_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/incremental_scenarios.py @@ -42,9 +42,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"}, + }, + ) + .build(), ) ) .set_expected_records( @@ -136,9 +141,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, + }, + ) + .build(), ) ) .set_expected_records( @@ -212,9 +222,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-01T03:54:07.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-01T03:54:07.000000Z"}, + }, + ) + .build(), ) ) .set_expected_records( @@ -577,9 +592,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"recent_file.csv": "2023-07-15T23:59:59.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"recent_file.csv": "2023-07-15T23:59:59.000000Z"}, + }, + ) + .build(), ) ) ).build() @@ -1016,9 +1036,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, + }, + ) + .build(), ) ) ).build() @@ -1139,9 +1164,14 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"}, - }).build(), + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"}, + }, + ) + .build(), ) ) ).build() @@ -1316,13 +1346,18 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": { - "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z", - "very_old_file.csv": "2023-06-02T03:54:07.000000Z", - "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z", + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": { + "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z", + "very_old_file.csv": "2023-06-02T03:54:07.000000Z", + "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z", + }, }, - }).build(), + ) + .build(), ) ) ).build() @@ -1612,13 +1647,18 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": { - "b.csv": "2023-06-05T03:54:07.000000Z", - "c.csv": "2023-06-05T03:54:07.000000Z", - "d.csv": "2023-06-05T03:54:07.000000Z", + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": { + "b.csv": "2023-06-05T03:54:07.000000Z", + "c.csv": "2023-06-05T03:54:07.000000Z", + "d.csv": "2023-06-05T03:54:07.000000Z", + }, }, - }).build(), + ) + .build(), ) ) ).build() @@ -1746,13 +1786,18 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": { - "c.csv": "2023-06-07T03:54:07.000000Z", - "d.csv": "2023-06-08T03:54:07.000000Z", - "e.csv": "2023-06-08T03:54:07.000000Z", + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": { + "c.csv": "2023-06-07T03:54:07.000000Z", + "d.csv": "2023-06-08T03:54:07.000000Z", + "e.csv": "2023-06-08T03:54:07.000000Z", + }, }, - }).build(), + ) + .build(), ) ) ).build() @@ -1906,13 +1951,18 @@ ) .set_incremental_scenario_config( IncrementalScenarioConfig( - input_state=StateBuilder().with_stream_state("stream1", { - "history": { - "old_file.csv": "2023-06-05T00:00:00.000000Z", - "c.csv": "2023-06-07T03:54:07.000000Z", - "d.csv": "2023-06-08T03:54:07.000000Z", + input_state=StateBuilder() + .with_stream_state( + "stream1", + { + "history": { + "old_file.csv": "2023-06-05T00:00:00.000000Z", + "c.csv": "2023-06-07T03:54:07.000000Z", + "d.csv": "2023-06-08T03:54:07.000000Z", + }, }, - }).build(), + ) + .build(), ) ) ).build() diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py index 2998f3deb6cca0..cee4c5b9a1e236 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py @@ -484,9 +484,7 @@ ] } ) - .set_expected_records( - [] - ) + .set_expected_records([]) .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value) .set_expected_read_error(AirbyteTracedException, "Please check the logged errors for more information.") .set_expected_logs( diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/unstructured_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/unstructured_scenarios.py index dc0824512a437c..da1e468c9df5c6 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/unstructured_scenarios.py @@ -15,12 +15,18 @@ json_schema = { "type": "object", "properties": { - "content": {"type": ["null", "string"], "description": "Content of the file as markdown. Might be null if the file could not be parsed"}, + "content": { + "type": ["null", "string"], + "description": "Content of the file as markdown. Might be null if the file could not be parsed", + }, "document_key": {"type": ["null", "string"], "description": "Unique identifier of the document, e.g. the file path"}, - "_ab_source_file_parse_error": {"type": ["null", "string"], "description": "Error message if the file could not be parsed even though the file is supported"}, + "_ab_source_file_parse_error": { + "type": ["null", "string"], + "description": "Error message if the file could not be parsed even though the file is supported", + }, "_ab_source_file_last_modified": {"type": "string"}, "_ab_source_file_url": {"type": "string"}, - } + }, } simple_markdown_scenario = ( @@ -69,7 +75,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -104,7 +110,6 @@ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c", "_ab_source_file_parse_error": None, - }, "stream": "stream1", }, @@ -132,9 +137,7 @@ .set_files( { "a.txt": { - "contents": bytes( - "Just some raw text", "UTF-8" - ), + "contents": bytes("Just some raw text", "UTF-8"), "last_modified": "2023-06-05T03:54:07.000Z", }, "b": { @@ -154,7 +157,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -179,7 +182,6 @@ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b", "_ab_source_file_parse_error": None, - }, "stream": "stream1", }, @@ -223,7 +225,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -273,7 +275,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -337,7 +339,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -417,7 +419,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -495,7 +497,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] @@ -563,7 +565,7 @@ "json_schema": json_schema, "name": "stream1", "source_defined_cursor": True, - 'source_defined_primary_key': [["document_key"]], + "source_defined_primary_key": [["document_key"]], "supported_sync_modes": ["full_refresh", "incremental"], } ] diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py index 58d528cb7caf85..974bbf558974c9 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py @@ -442,7 +442,13 @@ ] } ) - .set_catalog(CatalogBuilder().with_stream("stream1", SyncMode.full_refresh).with_stream("stream2", SyncMode.full_refresh).with_stream("stream3", SyncMode.full_refresh).build()) + .set_catalog( + CatalogBuilder() + .with_stream("stream1", SyncMode.full_refresh) + .with_stream("stream2", SyncMode.full_refresh) + .with_stream("stream3", SyncMode.full_refresh) + .build() + ) .set_expected_check_status("FAILED") .set_expected_check_error(None, FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA.value) .set_expected_discover_error(ConfigValidationError, FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA.value) diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_adapters.py b/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_adapters.py index e63f950b5b2ac4..2d93e73ced561e 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_adapters.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_adapters.py @@ -49,11 +49,15 @@ def test_file_based_stream_partition_generator(sync_mode): stream = Mock() message_repository = Mock() - stream_slices = [{"files": [RemoteFile(uri="1", last_modified=datetime.now())]}, - {"files": [RemoteFile(uri="2", last_modified=datetime.now())]}] + stream_slices = [ + {"files": [RemoteFile(uri="1", last_modified=datetime.now())]}, + {"files": [RemoteFile(uri="2", last_modified=datetime.now())]}, + ] stream.stream_slices.return_value = stream_slices - partition_generator = FileBasedStreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR) + partition_generator = FileBasedStreamPartitionGenerator( + stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR + ) partitions = list(partition_generator.generate()) slices = [partition.to_slice() for partition in partitions] @@ -134,7 +138,11 @@ def test_file_based_stream_partition_raising_exception(exception_type, expected_ @pytest.mark.parametrize( "_slice, expected_hash", [ - pytest.param({"files": [RemoteFile(uri="1", last_modified=datetime.strptime("2023-06-09T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"))]}, hash(("stream", "2023-06-09T00:00:00.000000Z_1")), id="test_hash_with_slice"), + pytest.param( + {"files": [RemoteFile(uri="1", last_modified=datetime.strptime("2023-06-09T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"))]}, + hash(("stream", "2023-06-09T00:00:00.000000Z_1")), + id="test_hash_with_slice", + ), pytest.param(None, hash("stream"), id="test_hash_no_slice"), ], ) diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py b/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py index 2bf39894b12a62..027038b2ae98d5 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py @@ -45,26 +45,17 @@ def _make_cursor(input_state: Optional[MutableMapping[str, Any]]) -> FileBasedCo pytest.param({}, (datetime.min, ""), id="no-state-gives-min-cursor"), pytest.param({"history": {}}, (datetime.min, ""), id="missing-cursor-field-gives-min-cursor"), pytest.param( - { - "history": {"a.csv": "2021-01-01T00:00:00.000000Z"}, - "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv" - }, + {"history": {"a.csv": "2021-01-01T00:00:00.000000Z"}, "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv"}, (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), id="cursor-value-matches-earliest-file", ), pytest.param( - { - "history": {"a.csv": "2021-01-01T00:00:00.000000Z"}, - "_ab_source_file_last_modified": "2020-01-01T00:00:00.000000Z_a.csv" - }, + {"history": {"a.csv": "2021-01-01T00:00:00.000000Z"}, "_ab_source_file_last_modified": "2020-01-01T00:00:00.000000Z_a.csv"}, (datetime.strptime("2020-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), id="cursor-value-is-earlier", ), pytest.param( - { - "history": {"a.csv": "2022-01-01T00:00:00.000000Z"}, - "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv" - }, + {"history": {"a.csv": "2022-01-01T00:00:00.000000Z"}, "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv"}, (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), id="cursor-value-is-later", ), @@ -73,30 +64,24 @@ def _make_cursor(input_state: Optional[MutableMapping[str, Any]]) -> FileBasedCo "history": { "a.csv": "2021-01-01T00:00:00.000000Z", "b.csv": "2021-01-02T00:00:00.000000Z", - "c.csv": "2021-01-03T00:00:00.000000Z" + "c.csv": "2021-01-03T00:00:00.000000Z", }, - "_ab_source_file_last_modified": "2021-01-04T00:00:00.000000Z_d.csv" + "_ab_source_file_last_modified": "2021-01-04T00:00:00.000000Z_d.csv", }, (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), id="cursor-not-earliest", ), pytest.param( - { - "history": {"b.csv": "2020-12-31T00:00:00.000000Z"}, - "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv" - }, + {"history": {"b.csv": "2020-12-31T00:00:00.000000Z"}, "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv"}, (datetime.strptime("2020-12-31T00:00:00.000000Z", DATE_TIME_FORMAT), "b.csv"), - id="state-with-cursor-and-earlier-history" + id="state-with-cursor-and-earlier-history", ), pytest.param( - { - "history": {"b.csv": "2021-01-02T00:00:00.000000Z"}, - "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv" - }, + {"history": {"b.csv": "2021-01-02T00:00:00.000000Z"}, "_ab_source_file_last_modified": "2021-01-01T00:00:00.000000Z_a.csv"}, (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), - id="state-with-cursor-and-later-history" + id="state-with-cursor-and-later-history", ), - ] + ], ) def test_compute_prev_sync_cursor(input_state: MutableMapping[str, Any], expected_cursor_value: Tuple[datetime, str]): cursor = _make_cursor(input_state) @@ -160,7 +145,7 @@ def test_compute_prev_sync_cursor(input_state: MutableMapping[str, Any], expecte "2022-01-05T00:00:00.000000Z_pending.csv", id="add-to-nonempty-history-pending-file-is-newer", ), - ] + ], ) def test_add_file( initial_state: MutableMapping[str, Any], @@ -175,23 +160,31 @@ def test_add_file( cursor._message_repository = mock_message_repository stream = MagicMock() - cursor.set_pending_partitions([ - FileBasedStreamPartition( - stream, - {"files": [RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))]}, - mock_message_repository, - SyncMode.full_refresh, - FileBasedConcurrentCursor.CURSOR_FIELD, - initial_state, - cursor - ) for uri, timestamp in pending_files - ]) + cursor.set_pending_partitions( + [ + FileBasedStreamPartition( + stream, + {"files": [RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))]}, + mock_message_repository, + SyncMode.full_refresh, + FileBasedConcurrentCursor.CURSOR_FIELD, + initial_state, + cursor, + ) + for uri, timestamp in pending_files + ] + ) uri, timestamp = file_to_add cursor.add_file(RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))) assert cursor._file_to_datetime_history == expected_history - assert cursor._pending_files == {uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in expected_pending_files} - assert mock_message_repository.emit_message.call_args_list[0].args[0].state.data["test"]["_ab_source_file_last_modified"] == expected_cursor_value + assert cursor._pending_files == { + uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in expected_pending_files + } + assert ( + mock_message_repository.emit_message.call_args_list[0].args[0].state.data["test"]["_ab_source_file_last_modified"] + == expected_cursor_value + ) @pytest.mark.parametrize( @@ -215,7 +208,7 @@ def test_add_file( "2021-01-05T00:00:00.000000Z_pending.csv", id="add-to-empty-history-file-not-in-pending-files", ), - ] + ], ) def test_add_file_invalid( initial_state: MutableMapping[str, Any], @@ -226,16 +219,23 @@ def test_add_file_invalid( expected_cursor_value: str, ): cursor = _make_cursor(initial_state) - cursor._pending_files = {uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in pending_files} + cursor._pending_files = { + uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in pending_files + } mock_message_repository = MagicMock() cursor._message_repository = mock_message_repository uri, timestamp = file_to_add cursor.add_file(RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))) assert cursor._file_to_datetime_history == expected_history - assert cursor._pending_files == {uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in expected_pending_files} + assert cursor._pending_files == { + uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in expected_pending_files + } assert mock_message_repository.emit_message.call_args_list[0].args[0].log.level.value == "WARN" - assert mock_message_repository.emit_message.call_args_list[1].args[0].state.data["test"]["_ab_source_file_last_modified"] == expected_cursor_value + assert ( + mock_message_repository.emit_message.call_args_list[1].args[0].state.data["test"]["_ab_source_file_last_modified"] + == expected_cursor_value + ) @pytest.mark.parametrize( @@ -243,37 +243,33 @@ def test_add_file_invalid( [ pytest.param({}, [], f"{datetime.min.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}_", id="no-state-no-pending"), pytest.param( - {"history": {"a.csv": "2021-01-01T00:00:00.000000Z"}}, - [], - "2021-01-01T00:00:00.000000Z_a.csv", - id="no-pending-with-history" + {"history": {"a.csv": "2021-01-01T00:00:00.000000Z"}}, [], "2021-01-01T00:00:00.000000Z_a.csv", id="no-pending-with-history" ), pytest.param( - {"history": {}}, - [("b.csv", "2021-01-02T00:00:00.000000Z")], - "2021-01-02T00:00:00.000000Z_b.csv", - id="pending-no-history" + {"history": {}}, [("b.csv", "2021-01-02T00:00:00.000000Z")], "2021-01-02T00:00:00.000000Z_b.csv", id="pending-no-history" ), pytest.param( {"history": {"a.csv": "2022-01-01T00:00:00.000000Z"}}, [("b.csv", "2021-01-02T00:00:00.000000Z")], "2021-01-01T00:00:00.000000Z_a.csv", - id="with-pending-before-history" + id="with-pending-before-history", ), pytest.param( {"history": {"a.csv": "2021-01-01T00:00:00.000000Z"}}, [("b.csv", "2022-01-02T00:00:00.000000Z")], "2022-01-01T00:00:00.000000Z_a.csv", - id="with-pending-after-history" + id="with-pending-after-history", ), - ] + ], ) def test_get_new_cursor_value(input_state: MutableMapping[str, Any], pending_files: List[Tuple[str, str]], expected_cursor_value: str): cursor = _make_cursor(input_state) pending_partitions = [] for url, timestamp in pending_files: partition = MagicMock() - partition.to_slice = lambda *args, **kwargs: {"files": [RemoteFile(uri=url, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))]} + partition.to_slice = lambda *args, **kwargs: { + "files": [RemoteFile(uri=url, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT))] + } pending_partitions.append(partition) cursor.set_pending_partitions(pending_partitions) @@ -288,7 +284,7 @@ def test_get_new_cursor_value(input_state: MutableMapping[str, Any], pending_fil False, (datetime.min, ""), ["new.csv"], - id="empty-history-one-new-file" + id="empty-history-one-new-file", ), pytest.param( [RemoteFile(uri="a.csv", last_modified=datetime.strptime("2021-01-02T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))], @@ -296,7 +292,7 @@ def test_get_new_cursor_value(input_state: MutableMapping[str, Any], pending_fil False, (datetime.min, ""), ["a.csv"], - id="non-empty-history-file-in-history-modified" + id="non-empty-history-file-in-history-modified", ), pytest.param( [RemoteFile(uri="a.csv", last_modified=datetime.strptime("2021-01-01T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))], @@ -304,9 +300,9 @@ def test_get_new_cursor_value(input_state: MutableMapping[str, Any], pending_fil False, (datetime.min, ""), [], - id="non-empty-history-file-in-history-not-modified" + id="non-empty-history-file-in-history-not-modified", ), - ] + ], ) def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_value, expected_files_to_sync): cursor = _make_cursor({}) @@ -328,7 +324,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.min, ""), datetime.min, True, - id="file-not-in-history-not-full-old-cursor" + id="file-not-in-history-not-full-old-cursor", ), pytest.param( RemoteFile(uri="new.csv", last_modified=datetime.strptime("2021-01-03T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -337,7 +333,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2024-01-02T00:00:00.000000Z", DATE_TIME_FORMAT), ""), datetime.min, True, - id="file-not-in-history-not-full-new-cursor" + id="file-not-in-history-not-full-new-cursor", ), pytest.param( RemoteFile(uri="a.csv", last_modified=datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -346,7 +342,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.min, ""), datetime.min, False, - id="file-in-history-not-modified" + id="file-in-history-not-modified", ), pytest.param( RemoteFile(uri="a.csv", last_modified=datetime.strptime("2020-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -355,7 +351,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.min, ""), datetime.min, False, - id="file-in-history-modified-before" + id="file-in-history-modified-before", ), pytest.param( RemoteFile(uri="a.csv", last_modified=datetime.strptime("2022-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -364,7 +360,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.min, ""), datetime.min, True, - id="file-in-history-modified-after" + id="file-in-history-modified-after", ), pytest.param( RemoteFile(uri="new.csv", last_modified=datetime.strptime("2022-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -373,7 +369,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2021-01-02T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), datetime.min, True, - id="history-full-file-modified-after-cursor" + id="history-full-file-modified-after-cursor", ), pytest.param( RemoteFile(uri="new1.csv", last_modified=datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -382,7 +378,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "new0.csv"), datetime.min, True, - id="history-full-modified-eq-cursor-uri-gt" + id="history-full-modified-eq-cursor-uri-gt", ), pytest.param( RemoteFile(uri="new0.csv", last_modified=datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -391,7 +387,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "new1.csv"), datetime.min, False, - id="history-full-modified-eq-cursor-uri-lt" + id="history-full-modified-eq-cursor-uri-lt", ), pytest.param( RemoteFile(uri="new.csv", last_modified=datetime.strptime("2020-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -400,7 +396,7 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), datetime.min, True, - id="history-full-modified-before-cursor-and-after-sync-start" + id="history-full-modified-before-cursor-and-after-sync-start", ), pytest.param( RemoteFile(uri="new.csv", last_modified=datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT)), @@ -409,9 +405,9 @@ def test_get_files_to_sync(all_files, history, is_history_full, prev_cursor_valu (datetime.strptime("2022-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), "a.csv"), datetime.strptime("2024-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), False, - id="history-full-modified-before-cursor-and-before-sync-start" + id="history-full-modified-before-cursor-and-before-sync-start", ), - ] + ], ) def test_should_sync_file( file_to_check: RemoteFile, @@ -439,21 +435,21 @@ def test_should_sync_file( {"a.csv": "2021-01-01T00:00:00.000000Z"}, False, datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), - id="non-full-history" + id="non-full-history", ), pytest.param( {f"file{i}.csv": f"2021-01-0{i}T00:00:00.000000Z" for i in range(1, 4)}, # all before the time window True, datetime.strptime("2021-01-01T00:00:00.000000Z", DATE_TIME_FORMAT), # Time window start time - id="full-history-earliest-before-window" + id="full-history-earliest-before-window", ), pytest.param( {f"file{i}.csv": f"2024-01-0{i}T00:00:00.000000Z" for i in range(1, 4)}, # all after the time window True, datetime.strptime("2023-06-13T00:00:00.000000Z", DATE_TIME_FORMAT), # Earliest file time - id="full-history-earliest-after-window" + id="full-history-earliest-after-window", ), - ] + ], ) def test_compute_start_time(input_history, is_history_full, expected_start_time, monkeypatch): cursor = _make_cursor({"history": input_history}) diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/stream/test_default_file_based_stream.py b/airbyte-cdk/python/unit_tests/sources/file_based/stream/test_default_file_based_stream.py index be36413f271bc6..e93eb6bbfc5efa 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/stream/test_default_file_based_stream.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/stream/test_default_file_based_stream.py @@ -66,9 +66,7 @@ class MockFormat: ), ], ) -def test_fill_nulls( - input_schema: Mapping[str, Any], expected_output: Mapping[str, Any] -) -> None: +def test_fill_nulls(input_schema: Mapping[str, Any], expected_output: Mapping[str, Any]) -> None: assert DefaultFileBasedStream._fill_nulls(input_schema) == expected_output @@ -103,14 +101,8 @@ def setUp(self) -> None: def test_when_read_records_from_slice_then_return_records(self) -> None: self._parser.parse_records.return_value = [self._A_RECORD] - messages = list( - self._stream.read_records_from_slice( - {"files": [RemoteFile(uri="uri", last_modified=self._NOW)]} - ) - ) - assert list(map(lambda message: message.record.data["data"], messages)) == [ - self._A_RECORD - ] + messages = list(self._stream.read_records_from_slice({"files": [RemoteFile(uri="uri", last_modified=self._NOW)]})) + assert list(map(lambda message: message.record.data["data"], messages)) == [self._A_RECORD] def test_given_exception_when_read_records_from_slice_then_do_process_other_files( self, @@ -165,9 +157,7 @@ def test_given_exception_after_skipping_records_when_read_records_from_slice_the ) -> None: self._stream_config.schemaless = False self._validation_policy.record_passes_validation_policy.return_value = False - self._parser.parse_records.side_effect = [ - self._iter([self._A_RECORD, ValueError("An error")]) - ] + self._parser.parse_records.side_effect = [self._iter([self._A_RECORD, ValueError("An error")])] messages = list( self._stream.read_records_from_slice( @@ -225,9 +215,7 @@ class TestFileBasedErrorCollector: "Multiple errors", ], ) - def test_collect_parsing_error( - self, stream, file, line_no, n_skipped, collector_expected_len - ) -> None: + def test_collect_parsing_error(self, stream, file, line_no, n_skipped, collector_expected_len) -> None: test_error_pattern = "Error parsing record." # format the error body test_error = ( @@ -252,11 +240,5 @@ def test_yield_and_raise_collected(self) -> None: # we expect the following method will raise the AirbyteTracedException with pytest.raises(AirbyteTracedException) as parse_error: list(self.test_error_collector.yield_and_raise_collected()) - assert ( - parse_error.value.message - == "Some errors occured while reading from the source." - ) - assert ( - parse_error.value.internal_message - == "Please check the logged errors for more information." - ) + assert parse_error.value.message == "Some errors occured while reading from the source." + assert parse_error.value.internal_message == "Please check the logged errors for more information." diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/test_file_based_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/test_file_based_scenarios.py index dfcea11f815e61..5a7a7b72ff9bee 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/test_file_based_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/test_file_based_scenarios.py @@ -258,7 +258,6 @@ single_csv_input_state_is_earlier_scenario_concurrent, single_csv_input_state_is_later_scenario_concurrent, single_csv_no_input_state_scenario_concurrent, - ] read_scenarios = discover_scenarios + [ diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py index e4d0aae7dacd54..47df14a0940358 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py @@ -80,11 +80,15 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac sorted_expected_records = sorted( filter(lambda e: "data" in e, expected_records), - key=lambda record: ",".join(f"{k}={v}" for k, v in sorted(record["data"].items(), key=lambda items: (items[0], items[1])) if k != "emitted_at"), + key=lambda record: ",".join( + f"{k}={v}" for k, v in sorted(record["data"].items(), key=lambda items: (items[0], items[1])) if k != "emitted_at" + ), ) sorted_records = sorted( filter(lambda r: r.record, records), - key=lambda record: ",".join(f"{k}={v}" for k, v in sorted(record.record.data.items(), key=lambda items: (items[0], items[1])) if k != "emitted_at"), + key=lambda record: ",".join( + f"{k}={v}" for k, v in sorted(record.record.data.items(), key=lambda items: (items[0], items[1])) if k != "emitted_at" + ), ) assert len(sorted_records) == len(sorted_expected_records) diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py index 08df8db39b7f84..af3161e07199d7 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py @@ -163,10 +163,17 @@ ) -CONCURRENT_STATE = StateBuilder().with_stream_state("stream1", { - "slices": [{"start": 0, "end": 0}], - "state_type": ConcurrencyCompatibleStateType.date_range.value, -}).build() +CONCURRENT_STATE = ( + StateBuilder() + .with_stream_state( + "stream1", + { + "slices": [{"start": 0, "end": 0}], + "state_type": ConcurrencyCompatibleStateType.date_range.value, + }, + ) + .build() +) test_incremental_stream_without_slice_boundaries_with_concurrent_state = ( TestScenarioBuilder() .set_name("test_incremental_stream_without_slice_boundaries_with_concurrent_state") diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py index 2080126a509e9c..8964024d2ca084 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py @@ -357,7 +357,7 @@ [ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"}, {"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"}, - {"stream1": {'cursor_field': 1}}, + {"stream1": {"cursor_field": 1}}, {"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"}, {"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"}, {"stream1": {"cursor_field": 2}}, diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py index f03fe0c0eb4337..3e0e00b62d32f7 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py @@ -113,7 +113,7 @@ def test_handle_partition_done_no_other_streams_to_generate_partitions_for(self) def test_handle_last_stream_partition_done(self): in_order_validation_mock = Mock() in_order_validation_mock.attach_mock(self._another_stream, "_another_stream") - in_order_validation_mock.attach_mock(self._message_repository, '_message_repository') + in_order_validation_mock.attach_mock(self._message_repository, "_message_repository") self._message_repository.consume_queue.return_value = iter([_ANY_AIRBYTE_MESSAGE]) stream_instances_to_read_from = [self._another_stream] @@ -143,10 +143,12 @@ def test_handle_last_stream_partition_done(self): status=AirbyteStreamStatus(AirbyteStreamStatus.COMPLETE), ), ), - ) + ), ] assert expected_messages == messages - assert in_order_validation_mock.mock_calls.index(call._another_stream.cursor.ensure_at_least_one_state_emitted) < in_order_validation_mock.mock_calls.index(call._message_repository.consume_queue) + assert in_order_validation_mock.mock_calls.index( + call._another_stream.cursor.ensure_at_least_one_state_emitted + ) < in_order_validation_mock.mock_calls.index(call._message_repository.consume_queue) def test_handle_partition(self): stream_instances_to_read_from = [self._another_stream] @@ -525,10 +527,11 @@ def test_on_exception_stops_streams_and_raises_an_exception(self): type=TraceType.STREAM_STATUS, emitted_at=1577836800000.0, stream_status=AirbyteStreamStatusTraceMessage( - stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE) + stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME), + status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE), ), ), - ) + ), ] assert messages == expected_message diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_cursor.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_cursor.py index dd1246fbc2a8f1..94ed5211eabb9f 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_cursor.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_cursor.py @@ -112,7 +112,7 @@ def test_given_no_boundary_fields_when_close_partition_then_emit_state(self) -> self._state_manager.update_state_for_stream.assert_called_once_with( _A_STREAM_NAME, _A_STREAM_NAMESPACE, - {'a_cursor_field_key': 10}, + {"a_cursor_field_key": 10}, ) def test_given_no_boundary_fields_when_close_multiple_partitions_then_raise_exception(self) -> None: diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_datetime_state_converter.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_datetime_state_converter.py index b516afaeef6226..534dbd580787c8 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_datetime_state_converter.py @@ -94,72 +94,72 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter None, {}, EpochValueConcurrentStreamStateConverter().zero_value, - id="epoch-converter-no-state-no-start-start-is-zero-value" + id="epoch-converter-no-state-no-start-start-is-zero-value", ), pytest.param( EpochValueConcurrentStreamStateConverter(), 1617030403, {}, datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), - id="epoch-converter-no-state-with-start-start-is-start" + id="epoch-converter-no-state-with-start-start-is-start", ), pytest.param( EpochValueConcurrentStreamStateConverter(), None, {"created_at": 1617030404}, datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc), - id="epoch-converter-state-without-start-start-is-from-state" + id="epoch-converter-state-without-start-start-is-from-state", ), pytest.param( EpochValueConcurrentStreamStateConverter(), 1617030404, {"created_at": 1617030403}, datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc), - id="epoch-converter-state-before-start-start-is-start" + id="epoch-converter-state-before-start-start-is-start", ), pytest.param( EpochValueConcurrentStreamStateConverter(), 1617030403, {"created_at": 1617030404}, datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc), - id="epoch-converter-state-after-start-start-is-from-state" + id="epoch-converter-state-after-start-start-is-from-state", ), pytest.param( IsoMillisConcurrentStreamStateConverter(), None, {}, IsoMillisConcurrentStreamStateConverter().zero_value, - id="isomillis-converter-no-state-no-start-start-is-zero-value" + id="isomillis-converter-no-state-no-start-start-is-zero-value", ), pytest.param( IsoMillisConcurrentStreamStateConverter(), "2021-08-22T05:03:27.000Z", {}, datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc), - id="isomillis-converter-no-state-with-start-start-is-start" + id="isomillis-converter-no-state-with-start-start-is-start", ), pytest.param( IsoMillisConcurrentStreamStateConverter(), None, {"created_at": "2021-08-22T05:03:27.000Z"}, datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc), - id="isomillis-converter-state-without-start-start-is-from-state" + id="isomillis-converter-state-without-start-start-is-from-state", ), pytest.param( IsoMillisConcurrentStreamStateConverter(), "2022-08-22T05:03:27.000Z", {"created_at": "2021-08-22T05:03:27.000Z"}, datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc), - id="isomillis-converter-state-before-start-start-is-start" + id="isomillis-converter-state-before-start-start-is-start", ), pytest.param( IsoMillisConcurrentStreamStateConverter(), "2022-08-22T05:03:27.000Z", {"created_at": "2023-08-22T05:03:27.000Z"}, datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc), - id="isomillis-converter-state-after-start-start-is-from-state" + id="isomillis-converter-state-after-start-start-is-from-state", ), - ] + ], ) def test_get_sync_start(converter, start, state, expected_start): assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start @@ -174,8 +174,12 @@ def test_get_sync_start(converter, start, state, expected_start): {}, { "legacy": {}, - "slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value, - "end": EpochValueConcurrentStreamStateConverter().zero_value}], + "slices": [ + { + "start": EpochValueConcurrentStreamStateConverter().zero_value, + "end": EpochValueConcurrentStreamStateConverter().zero_value, + } + ], "state_type": "date-range", }, id="empty-input-state-epoch", @@ -186,8 +190,12 @@ def test_get_sync_start(converter, start, state, expected_start): {"created": 1617030403}, { "state_type": "date-range", - "slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + } + ], "legacy": {"created": 1617030403}, }, id="with-input-state-epoch", @@ -198,8 +206,12 @@ def test_get_sync_start(converter, start, state, expected_start): {"created": "2021-08-22T05:03:27.000Z"}, { "state_type": "date-range", - "slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc), - "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc), + "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc), + } + ], "legacy": {"created": "2021-08-22T05:03:27.000Z"}, }, id="with-input-state-isomillis", @@ -227,8 +239,12 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec EpochValueConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + } + ], }, {"created": 1617030403}, id="epoch-single-slice", @@ -237,10 +253,16 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec EpochValueConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}, - {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + { + "start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + ], }, {"created": 1648566403}, id="epoch-overlapping-slices", @@ -249,10 +271,16 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec EpochValueConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}, - {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + { + "start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + ], }, {"created": 1617030403}, id="epoch-multiple-slices", @@ -261,8 +289,12 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec IsoMillisConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + } + ], }, {"created": "2021-03-29T15:06:43.000Z"}, id="isomillis-single-slice", @@ -271,10 +303,16 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec IsoMillisConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}, - {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + { + "start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + ], }, {"created": "2022-03-29T15:06:43.000Z"}, id="isomillis-overlapping-slices", @@ -283,10 +321,16 @@ def test_convert_from_sequential_state(converter, start, sequential_state, expec IsoMillisConcurrentStreamStateConverter(), { "state_type": "date-range", - "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}, - {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}], + "slices": [ + { + "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + { + "start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc), + }, + ], }, {"created": "2021-03-29T15:06:43.000Z"}, id="isomillis-multiple-slices", diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py index 5b832adeaec048..bdcd9ad43318ca 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py @@ -71,6 +71,7 @@ def inner_function() -> Iterable[Partition]: for partition in partitions: yield partition raise exception + return inner_function @staticmethod diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_reader.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_reader.py index 4b0742f991a139..9e9fb897394967 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_reader.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_partition_reader.py @@ -63,4 +63,5 @@ def _read_with_exception(records: List[Record], exception: Exception) -> Callabl def mocked_function() -> Iterable[Record]: yield from records raise exception + return mocked_function diff --git a/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py b/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py index f184fe50580242..f1489c43860a4a 100644 --- a/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py +++ b/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py @@ -372,17 +372,19 @@ def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_stat return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data)) -def _as_error_trace(stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str]) -> AirbyteMessage: +def _as_error_trace( + stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str] +) -> AirbyteMessage: trace_message = AirbyteTraceMessage( emitted_at=datetime.datetime.now().timestamp() * 1000.0, type=TraceType.ERROR, error=AirbyteErrorTraceMessage( - stream_descriptor=StreamDescriptor(name=stream), - message=error_message, - internal_message=internal_message, - failure_type=failure_type, - stack_trace=stack_trace, - ), + stream_descriptor=StreamDescriptor(name=stream), + message=error_message, + internal_message=internal_message, + failure_type=failure_type, + stack_trace=stack_trace, + ), ) return AirbyteMessage(type=MessageType.TRACE, trace=trace_message) @@ -1186,8 +1188,12 @@ def test_checkpoint_state_from_stream_instance(): managers_stream = StreamNoStateMethod() state_manager = ConnectorStateManager( { - "teams": AirbyteStream(name="teams", namespace="", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental]), - "managers": AirbyteStream(name="managers", namespace="", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental]) + "teams": AirbyteStream( + name="teams", namespace="", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental] + ), + "managers": AirbyteStream( + name="managers", namespace="", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental] + ), }, [], ) @@ -1207,9 +1213,19 @@ def test_checkpoint_state_from_stream_instance(): @pytest.mark.parametrize( "exception_to_raise,expected_error_message,expected_internal_message", [ - pytest.param(AirbyteTracedException(message="I was born only to crash like Icarus"), "I was born only to crash like Icarus", None, id="test_raises_traced_exception"), - pytest.param(Exception("Generic connector error message"), "Something went wrong in the connector. See the logs for more details.", "Generic connector error message", id="test_raises_generic_exception"), - ] + pytest.param( + AirbyteTracedException(message="I was born only to crash like Icarus"), + "I was born only to crash like Icarus", + None, + id="test_raises_traced_exception", + ), + pytest.param( + Exception("Generic connector error message"), + "Something went wrong in the connector. See the logs for more details.", + "Generic connector error message", + id="test_raises_generic_exception", + ), + ], ) def test_continue_sync_with_failed_streams(mocker, exception_to_raise, expected_error_message, expected_internal_message): """ @@ -1317,7 +1333,9 @@ def test_sync_error_trace_messages_obfuscate_secrets(mocker): stream_output = [{"k1": "v1"}, {"k2": "v2"}] s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1") - s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun.")) + s2 = StreamRaisesException( + exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun.") + ) s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3") mocker.patch.object(MockStream, "get_json_schema", return_value={}) diff --git a/airbyte-cdk/python/unit_tests/sources/test_connector_state_manager.py b/airbyte-cdk/python/unit_tests/sources/test_connector_state_manager.py index 3b7edc3e999129..9bee58eb69ee03 100644 --- a/airbyte-cdk/python/unit_tests/sources/test_connector_state_manager.py +++ b/airbyte-cdk/python/unit_tests/sources/test_connector_state_manager.py @@ -158,7 +158,10 @@ def test_initialize_state_manager(input_stream_state, expected_stream_state, expected_error): stream_to_instance_map = { "actors": AirbyteStream( - name="actors", namespace="public", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental], + name="actors", + namespace="public", + json_schema={}, + supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental], ) } @@ -268,7 +271,8 @@ def test_initialize_state_manager(input_stream_state, expected_stream_state, exp ], ) def test_get_stream_state(input_state, stream_name, namespace, expected_state): - stream_to_instance_map = {stream_name: AirbyteStream( + stream_to_instance_map = { + stream_name: AirbyteStream( name=stream_name, namespace=namespace, json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental] ) } diff --git a/airbyte-cdk/python/unit_tests/test/test_entrypoint_wrapper.py b/airbyte-cdk/python/unit_tests/test/test_entrypoint_wrapper.py index 08797e763d37f3..d0564cdf93f629 100644 --- a/airbyte-cdk/python/unit_tests/test/test_entrypoint_wrapper.py +++ b/airbyte-cdk/python/unit_tests/test/test_entrypoint_wrapper.py @@ -29,10 +29,7 @@ def _a_state_message(state: Any) -> AirbyteMessage: - return AirbyteMessage( - type=Type.STATE, - state=AirbyteStateMessage(data=state) - ) + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state)) def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteMessage: @@ -50,14 +47,10 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM _A_RECORD = AirbyteMessage( - type=Type.RECORD, - record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0) + type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0) ) _A_STATE_MESSAGE = _a_state_message({"state key": "state value for _A_STATE_MESSAGE"}) -_A_LOG = AirbyteMessage( - type=Type.LOG, - log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message") -) +_A_LOG = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message")) _AN_ERROR_MESSAGE = AirbyteMessage( type=Type.TRACE, trace=AirbyteTraceMessage( @@ -121,6 +114,7 @@ def _validate_tmp_files(self): _validate_tmp_catalog(expected_catalog, entrypoint.return_value.parse_args.call_args.args[0][4]) _validate_tmp_json_file(expected_state, entrypoint.return_value.parse_args.call_args.args[0][6]) return entrypoint.return_value.run.return_value + return _validate_tmp_files @@ -154,6 +148,7 @@ def test_given_logging_during_run_when_read_then_output_has_logs(self, entrypoin def _do_some_logging(self): logging.getLogger("any logger").info(_A_LOG_MESSAGE) return entrypoint.return_value.run.return_value + entrypoint.return_value.run.side_effect = _do_some_logging output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE) @@ -204,7 +199,7 @@ def test_given_trace_message_when_read_then_output_has_trace_messages(self, entr def test_given_stream_statuses_when_read_then_return_statuses(self, entrypoint): status_messages = [ _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.STARTED), - _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.COMPLETE) + _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.COMPLETE), ] entrypoint.return_value.run.return_value = _to_entrypoint_output(status_messages) output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE) @@ -215,20 +210,20 @@ def test_given_stream_statuses_for_many_streams_when_read_then_filter_other_stre status_messages = [ _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.STARTED), _a_status_message("another stream name", AirbyteStreamStatus.INCOMPLETE), - _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.COMPLETE) + _a_status_message(_A_STREAM_NAME, AirbyteStreamStatus.COMPLETE), ] entrypoint.return_value.run.return_value = _to_entrypoint_output(status_messages) output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE) assert len(output.get_stream_statuses(_A_STREAM_NAME)) == 2 - @patch('airbyte_cdk.test.entrypoint_wrapper.print', create=True) + @patch("airbyte_cdk.test.entrypoint_wrapper.print", create=True) @patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint") def test_given_unexpected_exception_when_read_then_print(self, entrypoint, print_mock): entrypoint.return_value.run.side_effect = ValueError("This error should be printed") read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE) assert print_mock.call_count > 0 - @patch('airbyte_cdk.test.entrypoint_wrapper.print', create=True) + @patch("airbyte_cdk.test.entrypoint_wrapper.print", create=True) @patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint") def test_given_expected_exception_when_read_then_do_not_print(self, entrypoint, print_mock): entrypoint.return_value.run.side_effect = ValueError("This error should be printed") diff --git a/airbyte-cdk/python/unit_tests/test_entrypoint.py b/airbyte-cdk/python/unit_tests/test_entrypoint.py index 61e7e7ec142a3c..7451a320d40420 100644 --- a/airbyte-cdk/python/unit_tests/test_entrypoint.py +++ b/airbyte-cdk/python/unit_tests/test_entrypoint.py @@ -247,7 +247,9 @@ def test_run_read(entrypoint: AirbyteEntrypoint, mocker, spec_mock, config_mock) assert spec_mock.called -def test_given_message_emitted_during_config_when_read_then_emit_message_before_next_steps(entrypoint: AirbyteEntrypoint, mocker, spec_mock, config_mock): +def test_given_message_emitted_during_config_when_read_then_emit_message_before_next_steps( + entrypoint: AirbyteEntrypoint, mocker, spec_mock, config_mock +): parsed_args = Namespace(command="read", config="config_path", state="statepath", catalog="catalogpath") mocker.patch.object(MockSource, "read_catalog", side_effect=ValueError)