In [279]:
import requests
import datetime
import json

In [280]:
AIRBYTE_PULL_DAYS = 10
# AIRBYTE_BASE_URL = "http://ec2-54-74-79-147.eu-west-1.compute.amazonaws.com/api" # Airbyte Core DEV
AIRBYTE_BASE_URL = (
    "http://ec2-52-214-45-28.eu-west-1.compute.amazonaws.com/api"  # PROD AMZ Airbyte
)

In [281]:
# SOURCES_NAMES = ["SP-API Financial Events",]

# if None will update all sources in the workspace
SOURCES_NAMES = None

In [282]:
def _get_workspaces() -> dict:
    url = f"{AIRBYTE_BASE_URL}/v1/workspaces/list"
    response = requests.post(url)

    if response.status_code == 200:
        return response.json()
    raise ValueError(response.json()["message"])


def _get_workspace_id() -> str:
    workspaces = _get_workspaces().get("workspaces", [])
    if len(workspaces) > 0:
        return workspaces[0].get("workspaceId", None)
    return None

In [283]:
def _find_all() -> dict:
    payload = {"workspaceId": _get_workspace_id()}
    url = f"{AIRBYTE_BASE_URL}/v1/sources/list"
    response = requests.post(url, json=payload)

    if response.status_code == 200:
        return response.json()
    raise ValueError(response.json()["message"])

In [284]:
def _update_source(source_json: dict) -> None:
    url = f"{AIRBYTE_BASE_URL}/v1/sources/update"
    response = requests.post(url, json=source_json)
    if response.status_code != 200:
        raise ValueError(response.json()["message"])

In [285]:
def get_sources_by_source_source_name(source_name: list):
    sources = _find_all().get("sources", [])
    return [source for source in sources if source["sourceName"] in source_name]

In [286]:
def _get_new_start_date(current_date: datetime) -> str:
    start_date = current_date + datetime.timedelta(days=-int(AIRBYTE_PULL_DAYS))
    return start_date.strftime("%Y-%m-%d") + "T00:00:00Z"

In [287]:
def update_source_config_field(source_json: dict, field: str, value: str) -> None:
    source_json["connectionConfiguration"][field] = value

    # Setting the end date to today, if exists
    if "replication_end_date" in source_json["connectionConfiguration"]:
        source_json["connectionConfiguration"]["replication_end_date"] = (
            datetime.datetime.now().strftime("%Y-%m-%d") + "T00:00:00Z"
        )

    for col in ["sourceDefinitionId", "workspaceId", "sourceName", "icon"]:
        if col in source_json:
            del source_json[col]

    # print(json.dumps(source_json, indent=4))
    _update_source(source_json)

In [288]:
def update_source_start_date(
    source_json: dict,
    start_date_field: str,
    current_date: datetime = datetime.datetime.now(),
) -> None:
    new_start_date = _get_new_start_date(current_date)
    update_source_config_field(source_json, start_date_field, new_start_date)

In [289]:
def update_sources_start_date(sources_names_list=SOURCES_NAMES):
    if SOURCES_NAMES is None:
        sources = _find_all().get("sources", [])
        print(f"all sources will be updated, number: {len(sources)}")
    else:
        sources = get_sources_by_source_source_name(sources_names_list)
        print(f"selected sources will be updated, number: {len(sources)}")

    for source in sources:
        source_id = source["sourceId"]
        update_source_start_date(source, "replication_start_date")
        print(f"Source ID: {source_id}, Source Name: {source['name']} updated")

In [None]:
update_sources_start_date(SOURCES_NAMES)