Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iterable: split email send stream into slices. #7780

Merged
merged 2 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
"sourceDefinitionId": "2e875208-0c0b-4ee4-9e92-1cb3156ea799",
"name": "Iterable",
"dockerRepository": "airbyte/source-iterable",
"dockerImageTag": "0.1.10",
"dockerImageTag": "0.1.12",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/iterable"
}
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@
- name: Iterable
sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799
dockerRepository: airbyte/source-iterable
dockerImageTag: 0.1.11
dockerImageTag: 0.1.12
documentationUrl: https://docs.airbyte.io/integrations/sources/iterable
sourceType: api
- name: Jira
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-iterable/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.11
LABEL io.airbyte.version=0.1.12
LABEL io.airbyte.name=airbyte/source-iterable
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-iterable/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

MAIN_REQUIREMENTS = [
"airbyte-cdk~=0.1",
"pendulum~=1.2",
"pendulum~=2.1.2",
"requests~=2.25",
]

TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3"]
TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3", "freezegun==1.1.0"]


setup(
Expand Down
123 changes: 6 additions & 117 deletions airbyte-integrations/connectors/source-iterable/source_iterable/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,129 +3,18 @@
#

import csv
import json
import urllib.parse as urlparse
from abc import ABC, abstractmethod
from io import StringIO
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional

import pendulum
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream
from source_iterable.iterable_streams import IterableExportStream, IterableExportStreamRanged, IterableStream

EVENT_ROWS_LIMIT = 200
CAMPAIGNS_PER_REQUEST = 20


class IterableStream(HttpStream, ABC):

# Hardcode the value because it is not returned from the API
BACKOFF_TIME_CONSTANT = 10.0
# define date-time fields with potential wrong format

url_base = "https://api.iterable.com/api/"
primary_key = "id"

def __init__(self, api_key, **kwargs):
super().__init__(**kwargs)
self._api_key = api_key

@property
@abstractmethod
def data_field(self) -> str:
"""
:return: Default field name to get data from response
"""

def backoff_time(self, response: requests.Response) -> Optional[float]:
return self.BACKOFF_TIME_CONSTANT

def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
"""
Iterable API does not support pagination
"""
return None

def request_params(self, **kwargs) -> MutableMapping[str, Any]:
return {"api_key": self._api_key}

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
response_json = response.json()
records = response_json.get(self.data_field, [])

for record in records:
yield record


class IterableExportStream(IterableStream, ABC):

cursor_field = "createdAt"
primary_key = None

def __init__(self, start_date, **kwargs):
super().__init__(**kwargs)
self._start_date = pendulum.parse(start_date)
self.stream_params = {"dataTypeName": self.data_field}

def path(self, **kwargs) -> str:
return "export/data.json"

@staticmethod
def _field_to_datetime(value: Union[int, str]) -> pendulum.datetime:
if isinstance(value, int):
value = pendulum.from_timestamp(value / 1000.0)
elif isinstance(value, str):
value = pendulum.parse(value)
else:
raise ValueError(f"Unsupported type of datetime field {type(value)}")
return value

def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
"""
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state object
and returning an updated state object.
"""
latest_benchmark = latest_record[self.cursor_field]
if current_stream_state.get(self.cursor_field):
return {self.cursor_field: str(max(latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field])))}
return {self.cursor_field: str(latest_benchmark)}

def request_params(self, stream_state: Mapping[str, Any], **kwargs) -> MutableMapping[str, Any]:

params = super().request_params(stream_state=stream_state)
start_datetime = self._start_date
if stream_state.get(self.cursor_field):
start_datetime = pendulum.parse(stream_state[self.cursor_field])

params.update(
{"startDateTime": start_datetime.strftime("%Y-%m-%d %H:%M:%S"), "endDateTime": pendulum.now().strftime("%Y-%m-%d %H:%M:%S")},
**self.stream_params,
)
return params

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
for obj in response.iter_lines():
record = json.loads(obj)
record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field])
yield record

def request_kwargs(
self,
stream_state: Mapping[str, Any],
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Mapping[str, Any]:
"""
https://api.iterable.com/api/docs#export_exportDataJson
Sending those type of requests could download large piece of json
objects splitted with newline character.
Passing stream=True argument to requests.session.send method to avoid
loading whole analytics report content into memory.
"""
return {"stream": True}


class Lists(IterableStream):
data_field = "lists"

Expand Down Expand Up @@ -257,7 +146,7 @@ class EmailBounce(IterableExportStream):
data_field = "emailBounce"


class EmailClick(IterableExportStream):
class EmailClick(IterableExportStreamRanged):
name = "email_click"
data_field = "emailClick"

Expand All @@ -267,17 +156,17 @@ class EmailComplaint(IterableExportStream):
data_field = "emailComplaint"


class EmailOpen(IterableExportStream):
class EmailOpen(IterableExportStreamRanged):
name = "email_open"
data_field = "emailOpen"


class EmailSend(IterableExportStream):
class EmailSend(IterableExportStreamRanged):
yevhenii-ldv marked this conversation as resolved.
Show resolved Hide resolved
name = "email_send"
data_field = "emailSend"


class EmailSendSkip(IterableExportStream):
class EmailSendSkip(IterableExportStreamRanged):
name = "email_send_skip"
data_field = "emailSendSkip"

Expand Down
Loading