diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json b/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json index 894d15718bc5..a2a14059c693 100644 --- a/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json +++ b/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json @@ -12828,7 +12828,7 @@ "sourceDefinitionId": "778daa7c-feaf-4db6-96f3-70fd645acc77", "name": "File (CSV, JSON, Excel, Feather, Parquet)", "dockerRepository": "airbyte/source-file", - "dockerImageTag": "0.3.2", + "dockerImageTag": "0.3.3", "documentationUrl": "https://docs.airbyte.com/integrations/sources/file", "icon": "file.svg", "sourceType": "file", @@ -12897,6 +12897,7 @@ "service_account_json": { "type": "string", "title": "Service Account JSON", + "airbyte_secret": true, "description": "In order to access private Buckets stored on Google Cloud, this connector would need a service account json credentials with the proper permissions as described here. Please generate the credentials.json file and copy/paste its content to this field (expecting JSON formats). If accessing publicly available data, this field is not necessary." } } diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml b/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml index ed8d4a0f7f83..6c4ef1451b6b 100644 --- a/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml @@ -637,7 +637,7 @@ - name: File (CSV, JSON, Excel, Feather, Parquet) sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.3.2 + dockerImageTag: 0.3.3 documentationUrl: https://docs.airbyte.com/integrations/sources/file icon: file.svg sourceType: file diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml b/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml index f1c1832da2bf..25c63dd3708f 100644 --- a/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml @@ -4567,7 +4567,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.3.2" +- dockerImage: "airbyte/source-file:0.3.3" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/file" connectionSpecification: @@ -4649,6 +4649,7 @@ service_account_json: type: "string" title: "Service Account JSON" + airbyte_secret: true description: "In order to access private Buckets stored on Google\ \ Cloud, this connector would need a service account json credentials\ \ with the proper permissions as described here. Please generate the credentials.json file and copy/paste its content to this field (expecting JSON formats). If accessing publicly available data, this field is not necessary." } } diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index a2d26a93464d..0f8c6c4af353 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -3,11 +3,11 @@ This page contains the setup guide and reference information for the Files source connector. ## Prerequisites -* URL to access the file -* Format -* Reader options -* Storage Providers +- URL to access the file +- Format +- Reader options +- Storage Providers ## Setup guide @@ -29,38 +29,38 @@ Setup through Airbyte Cloud will be exactly the same as the open-source setup, e ### Fields description -- For `Dataset Name` use the *name* of the final table to replicate this file into (should include letters, numbers dash and underscores only). -- For `File Format` use the *format* of the file which should be replicated (Warning: some formats may be experimental, please refer to the docs). -- For `Reader Options` use a *string in JSON* format. It depends on the chosen file format to provide additional options and tune its behavior. For example, `{}` for empty options, `{"sep": " "}` for set up separator to one space ' '. -- For `URL` use the *URL* path to access the file which should be replicated. -- For `Storage Provider` use the *storage Provider* or *Location* of the file(s) which should be replicated. - - [Default] *Public Web* +- For `Dataset Name` use the _name_ of the final table to replicate this file into (should include letters, numbers dash and underscores only). +- For `File Format` use the _format_ of the file which should be replicated (Warning: some formats may be experimental, please refer to the docs). +- For `Reader Options` use a _string in JSON_ format. It depends on the chosen file format to provide additional options and tune its behavior. For example, `{}` for empty options, `{"sep": " "}` for set up separator to one space ' '. +- For `URL` use the _URL_ path to access the file which should be replicated. +- For `Storage Provider` use the _storage Provider_ or _Location_ of the file(s) which should be replicated. + - [Default] _Public Web_ - `User-Agent` set to active if you want to add User-Agent to requests - - *GCS: Google Cloud Storage* + - _GCS: Google Cloud Storage_ - `Service Account JSON` In order to access private Buckets stored on Google Cloud, this connector would need a service account json credentials with the proper permissions as described here. Please generate the credentials.json file and copy/paste its content to this field (expecting JSON formats). If accessing publicly available data, this field is not necessary. - - *S3: Amazon Web Services* + - _S3: Amazon Web Services_ - `AWS Access Key ID` In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary. - `AWS Secret Access Key`In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary. - - *AzBlob: Azure Blob Storage* + - _AzBlob: Azure Blob Storage_ - `Storage Account` The globally unique name of the storage account that the desired blob sits within. See here for more details. - `SAS Token` To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a SAS (Shared Access Signature) token. If accessing publicly available data, this field is not necessary. - `Shared Key` To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a storage account shared key (aka account key or access key). If accessing publicly available data, this field is not necessary. - - *SSH: Secure Shell* - - `User` use *username*. - - `Password` use *password*. - - `Host` use a *host*. - - `Port` use a *port* for your host. - - *SCP: Secure copy protocol* - - `User` use *username*. - - `Password` use *password*. - - `Host` use a *host*. - - `Port` use a *port* for your host. - - *SFTP: Secure File Transfer Protocol* - - `User` use *username*. - - `Password` use *password*. - - `Host` use a *host*. - - `Port` use a *port* for your host. - - *Local Filesystem (limited)* + - _SSH: Secure Shell_ + - `User` use _username_. + - `Password` use _password_. + - `Host` use a _host_. + - `Port` use a _port_ for your host. + - _SCP: Secure copy protocol_ + - `User` use _username_. + - `Password` use _password_. + - `Host` use a _host_. + - `Port` use a _port_ for your host. + - _SFTP: Secure File Transfer Protocol_ + - `User` use _username_. + - `Password` use _password_. + - `Host` use a _host_. + - `Port` use a _port_ for your host. + - _Local Filesystem (limited)_ - `Storage` WARNING: Note that the local storage URL available for reading must start with the local mount "/local/" at the moment until we implement more advanced docker mounting options. #### Provider Specific Information @@ -79,7 +79,7 @@ For example, if the format `CSV` is selected, then options from the [read_csv](h - It is therefore possible to customize the `delimiter` (or `sep`) to in case of tab separated files. - Header line can be ignored with `header=0` and customized with `names` -- Parse dates for in specified columns +- Parse dates for in specified columns - etc We would therefore provide in the `reader_options` the following json: @@ -190,7 +190,8 @@ In order to read large files from a remote location, this connector uses the [sm ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------------------------ | +| 0.3.3 | 2023-05-04 | [25819](https://github.com/airbytehq/airbyte/pull/25819) | GCP service_account_json is a secret | | 0.3.2 | 2023-05-01 | [25641](https://github.com/airbytehq/airbyte/pull/25641) | Handle network errors | | 0.3.1 | 2023-04-27 | [25575](https://github.com/airbytehq/airbyte/pull/25575) | Fix OOM; read Excel files in chunks using `openpyxl` | | 0.3.0 | 2023-04-24 | [25445](https://github.com/airbytehq/airbyte/pull/25445) | Add datatime format parsing support for csv files |