Skip to content

Commit

Permalink
Iterable: split email send stream into slices. (#7780)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmytro committed Nov 10, 2021
1 parent c0cc288 commit 6f70b6b
Show file tree
Hide file tree
Showing 9 changed files with 329 additions and 124 deletions.
Expand Up @@ -2,6 +2,6 @@
"sourceDefinitionId": "2e875208-0c0b-4ee4-9e92-1cb3156ea799",
"name": "Iterable",
"dockerRepository": "airbyte/source-iterable",
"dockerImageTag": "0.1.10",
"dockerImageTag": "0.1.12",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/iterable"
}
Expand Up @@ -268,7 +268,7 @@
- name: Iterable
sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799
dockerRepository: airbyte/source-iterable
dockerImageTag: 0.1.11
dockerImageTag: 0.1.12
documentationUrl: https://docs.airbyte.io/integrations/sources/iterable
sourceType: api
- name: Jira
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-iterable/Dockerfile
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.11
LABEL io.airbyte.version=0.1.12
LABEL io.airbyte.name=airbyte/source-iterable
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-iterable/setup.py
Expand Up @@ -7,11 +7,11 @@

MAIN_REQUIREMENTS = [
"airbyte-cdk~=0.1",
"pendulum~=1.2",
"pendulum~=2.1.2",
"requests~=2.25",
]

TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3"]
TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3", "freezegun==1.1.0"]


setup(
Expand Down
123 changes: 6 additions & 117 deletions airbyte-integrations/connectors/source-iterable/source_iterable/api.py
Expand Up @@ -3,129 +3,18 @@
#

import csv
import json
import urllib.parse as urlparse
from abc import ABC, abstractmethod
from io import StringIO
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional

import pendulum
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream
from source_iterable.iterable_streams import IterableExportStream, IterableExportStreamRanged, IterableStream

EVENT_ROWS_LIMIT = 200
CAMPAIGNS_PER_REQUEST = 20


class IterableStream(HttpStream, ABC):

# Hardcode the value because it is not returned from the API
BACKOFF_TIME_CONSTANT = 10.0
# define date-time fields with potential wrong format

url_base = "https://api.iterable.com/api/"
primary_key = "id"

def __init__(self, api_key, **kwargs):
super().__init__(**kwargs)
self._api_key = api_key

@property
@abstractmethod
def data_field(self) -> str:
"""
:return: Default field name to get data from response
"""

def backoff_time(self, response: requests.Response) -> Optional[float]:
return self.BACKOFF_TIME_CONSTANT

def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
"""
Iterable API does not support pagination
"""
return None

def request_params(self, **kwargs) -> MutableMapping[str, Any]:
return {"api_key": self._api_key}

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
response_json = response.json()
records = response_json.get(self.data_field, [])

for record in records:
yield record


class IterableExportStream(IterableStream, ABC):

cursor_field = "createdAt"
primary_key = None

def __init__(self, start_date, **kwargs):
super().__init__(**kwargs)
self._start_date = pendulum.parse(start_date)
self.stream_params = {"dataTypeName": self.data_field}

def path(self, **kwargs) -> str:
return "export/data.json"

@staticmethod
def _field_to_datetime(value: Union[int, str]) -> pendulum.datetime:
if isinstance(value, int):
value = pendulum.from_timestamp(value / 1000.0)
elif isinstance(value, str):
value = pendulum.parse(value)
else:
raise ValueError(f"Unsupported type of datetime field {type(value)}")
return value

def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
"""
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state object
and returning an updated state object.
"""
latest_benchmark = latest_record[self.cursor_field]
if current_stream_state.get(self.cursor_field):
return {self.cursor_field: str(max(latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field])))}
return {self.cursor_field: str(latest_benchmark)}

def request_params(self, stream_state: Mapping[str, Any], **kwargs) -> MutableMapping[str, Any]:

params = super().request_params(stream_state=stream_state)
start_datetime = self._start_date
if stream_state.get(self.cursor_field):
start_datetime = pendulum.parse(stream_state[self.cursor_field])

params.update(
{"startDateTime": start_datetime.strftime("%Y-%m-%d %H:%M:%S"), "endDateTime": pendulum.now().strftime("%Y-%m-%d %H:%M:%S")},
**self.stream_params,
)
return params

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
for obj in response.iter_lines():
record = json.loads(obj)
record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field])
yield record

def request_kwargs(
self,
stream_state: Mapping[str, Any],
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Mapping[str, Any]:
"""
https://api.iterable.com/api/docs#export_exportDataJson
Sending those type of requests could download large piece of json
objects splitted with newline character.
Passing stream=True argument to requests.session.send method to avoid
loading whole analytics report content into memory.
"""
return {"stream": True}


class Lists(IterableStream):
data_field = "lists"

Expand Down Expand Up @@ -257,7 +146,7 @@ class EmailBounce(IterableExportStream):
data_field = "emailBounce"


class EmailClick(IterableExportStream):
class EmailClick(IterableExportStreamRanged):
name = "email_click"
data_field = "emailClick"

Expand All @@ -267,17 +156,17 @@ class EmailComplaint(IterableExportStream):
data_field = "emailComplaint"


class EmailOpen(IterableExportStream):
class EmailOpen(IterableExportStreamRanged):
name = "email_open"
data_field = "emailOpen"


class EmailSend(IterableExportStream):
class EmailSend(IterableExportStreamRanged):
name = "email_send"
data_field = "emailSend"


class EmailSendSkip(IterableExportStream):
class EmailSendSkip(IterableExportStreamRanged):
name = "email_send_skip"
data_field = "emailSendSkip"

Expand Down

0 comments on commit 6f70b6b

Please sign in to comment.