-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add normalization (#75) Signed-off-by: Revital Sur <eres@il.ibm.com> Co-authored-by: Doron Chen <cdoron@il.ibm.com> * Change the run command of the normalization image. Signed-off-by: Revital Sur <eres@il.ibm.com> * Add container.py for common code to handle container functionality. Signed-off-by: Revital Sur <eres@il.ibm.com> * Apply normalization only on write operation. Signed-off-by: Revital Sur <eres@il.ibm.com> * Add a table to the asset. Signed-off-by: Revital Sur <eres@il.ibm.com> * Fixes after testing. Signed-off-by: Revital Sur <eres@il.ibm.com> * Add overwrite mode for write operation. Signed-off-by: Revital Sur <eres@il.ibm.com> * Minor fix. Signed-off-by: Revital Sur <eres@il.ibm.com> * Address review comments. Signed-off-by: Revital Sur <eres@il.ibm.com> * Remove the usage of transformer.transform in extract_data. Signed-off-by: Revital Sur <eres@il.ibm.com> * Address Doron's comments. Signed-off-by: Revital Sur <eres@il.ibm.com> * Minor changes. Signed-off-by: Revital Sur <eres@il.ibm.com> * Add comments. Signed-off-by: Revital Sur <eres@il.ibm.com> * Rename function name in abm/connector.py Signed-off-by: Revital Sur <eres@il.ibm.com> * Minor fix. Signed-off-by: Revital Sur <eres@il.ibm.com> * Minor change. Signed-off-by: Revital Sur <eres@il.ibm.com> * Address review comments. Signed-off-by: Revital Sur <eres@il.ibm.com> --------- Signed-off-by: Revital Sur <eres@il.ibm.com> Co-authored-by: Doron Chen <cdoron@il.ibm.com>
- Loading branch information
Showing
14 changed files
with
239 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# | ||
# Copyright 2022 IBM Corp. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
import docker | ||
import time | ||
|
||
CTRLD = '\x04'.encode() | ||
|
||
class Container: | ||
def __init__(self, logger, workdir, mountdir): | ||
self.logger = logger | ||
self.workdir = workdir | ||
self.mountdir = mountdir | ||
# Potentially the fybrik-blueprint pod for the airbyte module can start before the docker daemon pod, causing | ||
# docker.from_env() to fail | ||
retryLoop = 0 | ||
while retryLoop < 10: | ||
try: | ||
self.client = docker.from_env() | ||
except Exception as e: | ||
print('error on docker.from_env() ' + str(e) + ' sleep and retry. Retry count = ' + str(retryLoop)) | ||
time.sleep(1) | ||
retryLoop += 1 | ||
else: | ||
retryLoop = 10 | ||
|
||
''' | ||
Translate the name of the temporary file in the host to the name of the same file | ||
in the container. | ||
For instance, it the path is '/tmp/tmp12345', return '/local/tmp12345'. | ||
''' | ||
def name_in_container(self, path): | ||
return path.replace(self.workdir, self.mountdir, 1) | ||
|
||
def filter_reply(self, reply): | ||
return reply | ||
|
||
''' | ||
Run a docker container from the connector image. | ||
Mount the workdir on /local. Remove the container after done. | ||
''' | ||
def run_container(self, command, image, volumes, environment=None, remove=True, detach=False, stream=True, init=False): | ||
self.logger.debug("running command: " + command) | ||
|
||
try: | ||
reply = self.client.containers.run(image, volumes=volumes, network_mode='host', | ||
environment=environment, | ||
command=command, init=init, stream=stream, remove=remove, detach=detach) | ||
return self.filter_reply(reply) | ||
except docker.errors.DockerException as e: | ||
self.logger.error('Running of docker container failed', | ||
extra={'error': str(e)}) | ||
return None | ||
|
||
def open_socket_to_container(self, command, image, volumes, detach=True, tty=True, stdin_open=True, remove=True): | ||
container = self.client.containers.run(image, detach=detach, | ||
tty=tty, stdin_open=stdin_open, | ||
volumes=volumes, network_mode='host', | ||
command=command, remove=remove) | ||
# attach to the container stdin socket | ||
s = container.attach_socket(params={'stdin': 1, 'stream': 1, 'stdout': 1, 'stderr': 1}) | ||
s._sock.setblocking(True) | ||
return s, container | ||
|
||
def close_socket_to_container(self, s, container): | ||
s._sock.sendall(CTRLD) # ctrl d to finish things up | ||
s._sock.close() | ||
container.stop() | ||
self.client.close() | ||
|
||
def write_to_socket_to_container(self, s, binary_textline): | ||
s._sock.sendall(binary_textline) | ||
s.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# | ||
# Copyright 2022 IBM Corp. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
import docker | ||
import tempfile | ||
from .container import Container | ||
|
||
MOUNTDIR = '/local' | ||
|
||
class NormalizationContainer(Container): | ||
def __init__(self, config, logger, workdir, asset_name=""): | ||
if 'image' not in config['normalization']: | ||
raise ValueError("'image' field missing from normalization section in configuration") | ||
self.normalization_image = config['normalization']['image'] | ||
if 'integrationType' not in config['normalization']: | ||
raise ValueError("'integrationType' field missing from normalization section in configuration") | ||
self.integration_type = config['normalization']['integrationType'] | ||
if 'airbyteVersion' not in config['normalization']: | ||
raise ValueError("'airbyteVersion' field missing from normalization section in configuration") | ||
self.airbyte_version = config['normalization']['airbyteVersion'] | ||
super().__init__(logger, workdir, MOUNTDIR) | ||
|
||
''' | ||
Run a docker container from the connector image. | ||
Mount the workdir on /local. Remove the container after done. | ||
''' | ||
def run_container(self, command): | ||
volumes=[self.workdir + ':' + MOUNTDIR] | ||
# The normalization image is tied to and released along with a specific Airbyte version. | ||
# ref: https://github.com/airbytehq/airbyte/blob/master/docs/understanding-airbyte/basic-normalization.md#airbyte-integrationbasesbase-normalization | ||
environment=["WORKER_ENVIRONMENT=DOCKER", "AIRBYTE_VERSION=" + self.airbyte_version] | ||
super().run_container(command, self.normalization_image, volumes, environment, remove=True, stream=True, init=True) | ||
|
||
''' | ||
Creates a normalization command | ||
''' | ||
def create_normalization_command(self, catalog, config): | ||
command = 'run --config ' + self.name_in_container(config.name) + \ | ||
' --catalog ' + self.name_in_container(catalog.name) + ' --integration-type ' + \ | ||
self.integration_type | ||
|
||
return command |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.