Skip to content

Commit

Permalink
✨Source Senseforce: Make Connector Compatible with Builder (#39462)
Browse files Browse the repository at this point in the history
  • Loading branch information
btkcodedev committed Jun 13, 2024
1 parent fcc2940 commit 367a7d3
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 167 deletions.
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-senseforce/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ poetry install --with dev
**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/senseforce)
to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_senseforce/spec.yaml` file.
Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information.
See `sample_files/sample_config.json` for a sample config file.
See `integration_tests/sample_config.json` for a sample config file.


### Locally running the connector
```
poetry run source-senseforce spec
poetry run source-senseforce check --config secrets/config.json
poetry run source-senseforce discover --config secrets/config.json
poetry run source-senseforce read --config secrets/config.json --catalog sample_files/configured_catalog.json
poetry run source-senseforce read --config secrets/config.json --catalog integration_tests/configured_catalog.json
```

### Running unit tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 39de93cb-1511-473e-a673-5cbedb9436af
dockerImageTag: 0.1.3
dockerImageTag: 0.1.4
dockerRepository: airbyte/source-senseforce
githubIssueLabel: source-senseforce
icon: senseforce.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "0.1.3"
version = "0.1.4"
name = "source-senseforce"
description = "Source implementation for Senseforce."
authors = [ "Airbyte <contact@airbyte.io>",]
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,79 +1,195 @@
version: "1.0.0"
version: 1.0.0

definitions:
selector:
extractor:
field_path: []
type: DeclarativeSource

check:
type: CheckStream
stream_names:
- dataset

requester:
# url_base: "http://localhost:8080"
definitions:
streams:
dataset:
type: DeclarativeStream
name: dataset
primary_key:
- id
retriever:
type: SimpleRetriever
requester:
$ref: "#/definitions/base_requester"
path: /api/dataset/execute/{{ config['dataset_id']}}
http_method: POST
request_headers:
Content-Type: application/json
request_body_data: |
[{"clause": {"type": "timestamp", "operator": 10, "parameters":
[{"value": {{ stream_slice['start_time'] | int * 1000 }} },
{"value": {{ stream_slice['end_time'] | int * 1000 + (86400000 - 1) }} }
]
}, "orderBy": 1, "columnName": "Timestamp"}]/
record_selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path: []
paginator:
type: DefaultPaginator
page_token_option:
type: RequestOption
inject_into: request_parameter
field_name: offset
page_size_option:
type: RequestOption
inject_into: request_parameter
field_name: limit
pagination_strategy:
type: OffsetIncrement
page_size: 10000
incremental_sync:
type: DatetimeBasedCursor
cursor_field: airbyte_cursor
name: dataset
primary_key:
- id
path: /api/dataset/execute/{{ config['dataset_id']}}
cursor_datetime_formats:
- "%s"
datetime_format: "%s"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
end_datetime:
type: MinMaxDatetime
datetime: "{{ now_utc() }}"
datetime_format: "%Y-%m-%d %H:%M:%S.%f+00:00"
step: P100D
cursor_granularity: PT1S
transformations:
- type: AddFields
fields:
- path:
- airbyte_cursor
value: "{{ record['timestamp'] | int / 1000 }}"
schema_loader:
type: InlineSchemaLoader
schema:
$ref: "#/schemas/dataset"
base_requester:
type: HttpRequester
url_base: "{{ config['backend_url'] }}"
http_method: "POST"
request_body_data: |
[{"clause": {"type": "timestamp", "operator": 10, "parameters":
[{"value": {{ stream_slice['start_time'] | int * 1000 }} },
{"value": {{ stream_slice['end_time'] | int * 1000 + (86400000 - 1) }} }
]
}, "orderBy": 1, "columnName": "Timestamp"}]/
request_headers:
Content-Type: application/json
authenticator:
type: BearerAuthenticator
api_token: "{{ config['access_token'] }}"

incremental_sync:
type: "DatetimeBasedCursor"
start_datetime:
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
end_datetime:
datetime: "{{ now_utc() }}"
datetime_format: "%Y-%m-%d %H:%M:%S.%f+00:00"
step: "P100D" #TODO: Add {{ config['slice_range'] ~ d }} here, once it's possible to use config-values for step definition
datetime_format: "%s"
cursor_granularity: "PT1S"
cursor_field: "airbyte_cursor"

retriever:
record_selector:
$ref: "#/definitions/selector"
paginator:
type: DefaultPaginator
page_size_option:
inject_into: "request_parameter"
field_name: "limit"
pagination_strategy:
type: "OffsetIncrement"
page_size: 10000
page_token_option:
type: RequestOption
field_name: "offset"
inject_into: "request_parameter"
requester:
$ref: "#/definitions/requester"
streams:
- $ref: "#/definitions/streams/dataset"

base_stream:
incremental_sync:
$ref: "#/definitions/incremental_sync"
retriever:
$ref: "#/definitions/retriever"
dataset_stream:
$ref: "#/definitions/base_stream"
$parameters:
name: "dataset"
primary_key:
- "id"
path: "/api/dataset/execute/{{ config['dataset_id']}}"
transformations:
- type: AddFields
fields:
- path: ["airbyte_cursor"]
value: "{{ record['timestamp'] | int / 1000 }}"
spec:
type: Spec
connection_specification:
type: object
$schema: http://json-schema.org/draft-07/schema#
required:
- access_token
- backend_url
- dataset_id
- start_date
properties:
access_token:
type: string
title: API Access Token
description: >-
Your API access token. See <a
href="https://manual.senseforce.io/manual/sf-platform/public-api/get-your-access-token/">here</a>.
The toke is case sensitive.
airbyte_secret: true
order: 0
backend_url:
type: string
title: Senseforce backend URL
examples:
- https://galaxyapi.senseforce.io
description: >-
Your Senseforce API backend URL. This is the URL shown during the
Login screen. See <a
href="https://manual.senseforce.io/manual/sf-platform/public-api/get-your-access-token/">here</a>
for more details. (Note: Most Senseforce backend APIs have the term
'galaxy' in their ULR)
order: 1
dataset_id:
type: string
title: Dataset ID
examples:
- 8f418098-ca28-4df5-9498-0df9fe78eda7
description: >-
The ID of the dataset you want to synchronize. The ID can be found in
the URL when opening the dataset. See <a
href="https://manual.senseforce.io/manual/sf-platform/public-api/get-your-access-token/">here</a>
for more details. (Note: As the Senseforce API only allows to
synchronize a specific dataset, each dataset you want to synchronize
needs to be implemented as a separate airbyte source).
order: 2
slice_range:
type: integer
title: Data request time increment in days
default: 10
minimum: 1
maximum: 365
examples:
- 1
- 3
- 10
- 30
- 180
- 360
airbyte_hidden: true
description: >-
The time increment used by the connector when requesting data from the
Senseforce API. The bigger the value is, the less requests will be
made and faster the sync will be. On the other hand, the more seldom
the state is persisted and the more likely one could run into rate
limites. Furthermore, consider that large chunks of time might take a
long time for the Senseforce query to return data - meaning it could
take in effect longer than with more smaller time slices. If there are
a lot of data per day, set this setting to 1. If there is only very
little data per day, you might change the setting to 10 or more.
order: 3
start_date:
type: string
title: The first day (in UTC) when to read data from.
pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}$
description: >-
UTC date and time in the format 2017-01-25. Only data with "Timestamp"
after this date will be replicated. Important note: This start date
must be set to the first day of where your dataset provides data. If
your dataset has data from 2020-10-10 10:21:10, set the start_date to
2020-10-10 or later
examples:
- "2017-01-25"
format: date
order: 4
additionalProperties: true

streams:
- "#/definitions/dataset_stream"
metadata:
autoImportSchema:
dataset: false

check:
stream_names:
- "dataset"
schemas:
dataset:
type: object
$schema: http://json-schema.org/draft-07/schema#
additionalProperties: true
properties:
airbyte_cursor:
type: number
id:
type:
- "null"
- string
thing:
type: string
timestamp:
type: integer

This file was deleted.

This file was deleted.

Loading

0 comments on commit 367a7d3

Please sign in to comment.