In [37]:
# %run ../setup_db.ipynb
# conf = SandboxConfig(EXTERNAL_HOST_NAME="127.0.0.1", HOST_PORT=8563, BUCKETFS_PORT=2580)
# setup_schema(conf)

Schema created in 38.90ms


Notebook setup [cloud-storage-extension](https://github.com/exasol/cloud-storage-extension) in the DB

- [x] retrieve the last released jar from github 
- [x] put it in bucketfs of our DB
- [x] setup connectors

In [2]:
import requests
import pathlib
import typing as tt

# Retrieve the latest released jar file

In [3]:
def get_latest_version_and_jar_url() -> tt.Tuple[str, str]:
    r = requests.get("https://api.github.com/repos/exasol/cloud-storage-extension/releases/latest")
    if r.status_code != 200:
        raise RuntimeError("Error sending request to the github api, code: %d" % r.status_code)
    data = r.json()
    version = data.get('tag_name')
    if version is None:
        raise RuntimeError("The latests version has no tag, something is wrong")
    for asset in data.get('assets', []):
        name = asset['name']
        if name.endswith(f"{version}.jar"):
            dl_url = asset['browser_download_url']
            return version, dl_url
    raise RuntimeError("Could not find proper jar url for the latest release")

In [40]:
def get_cloud_storage_jar(use_local_cache: bool = True) -> pathlib.Path:
    version, jar_url = get_latest_version_and_jar_url()
    _, local_jar_name = jar_url.rsplit('/', maxsplit=1)
    local_jar_path = pathlib.Path(local_jar_name)
    if use_local_cache and local_jar_path.exists():
        print(f"Jar for version {version} already exists in {local_jar_path}, skip downloading")
    else:
        print(f"Fetching jar for version {version} from {jar_url}...")
        r = requests.get(jar_url, stream=True)
        count_bytes = local_jar_path.write_bytes(r.content)
        print(f"Saved {count_bytes} bytes in {local_jar_path}")
    return local_jar_path

In [41]:
#local_jar_path = get_cloud_storage_jar()

Jar for version 2.7.6 already exists in exasol-cloud-storage-extension-2.7.6.jar, skip downloading


# Upload the jar to the bucketfs

In [5]:
from exasol import bucketfs

In [42]:
def put_in_bucketfs(conf: SandboxConfig, file_path: pathlib.Path, bucket_name: str = "myudfs") -> str:
    bfs_url = conf.BUCKETFS_URL_PREFIX + conf.EXTERNAL_BUCKETFS_HOST
    bfs_creds = {
        bucket_name: {
            "username": conf.BUCKETFS_USER,
            "password": conf.BUCKETFS_PASSWORD,
        } 
    }
    svc = bucketfs.Service(bfs_url, bfs_creds)
    bucket = svc[bucket_name]
    local_jar_name = file_path.name
    jar_exists = local_jar_name in list(bucket)
    if jar_exists:
        print("Jar file is already present in the bucketfs")
    else:
        print("Upload jar to bucketfs")
        with file_path.open("rb") as fd:
            bucket.upload(local_jar_name, fd)
    bucketfs_jar = f"{conf.bucketfs_path(bucket_name)}/{local_jar_name}"
    return bucketfs_jar

In [43]:
#bucketfs_jar = put_in_bucketfs(conf, local_jar_path)

Jar file is already present in the bucketfs


# Create connection scripts

In [13]:
import pyexasol

In [50]:
def setup_scripts(conf: SandboxConfig, bucketfs_jar: str):
    sqls = [ 
        "OPEN SCHEMA {schema!i}",
        """
--/
            CREATE OR REPLACE JAVA SET SCRIPT IMPORT_PATH(...) EMITS (...) AS
              %scriptclass com.exasol.cloudetl.scriptclasses.FilesImportQueryGenerator;
              %jar {jar_path!r};
/
        """,
        """
--/
        CREATE OR REPLACE JAVA SCALAR SCRIPT IMPORT_METADATA(...) 
          EMITS (
                filename VARCHAR(2000), 
                partition_index VARCHAR(100), 
                start_index DECIMAL(36, 0), 
                end_index DECIMAL(36, 0)
          ) AS
          %scriptclass com.exasol.cloudetl.scriptclasses.FilesMetadataReader;
          %jar {jar_path!r};
/
        """,
        """
--/
        CREATE OR REPLACE JAVA SET SCRIPT IMPORT_FILES(...) EMITS (...) AS
          %scriptclass com.exasol.cloudetl.scriptclasses.FilesDataImporter;
          %jar {jar_path!r};
/
        """
    ]
    with pyexasol.connect(**conf.connection_params) as conn:
        for sql in sqls:
            conn.execute(sql, query_params={
                "schema": conf.SCHEMA,
                "jar_path": bucketfs_jar,
            })

In [51]:
#setup_scripts(conf, bucketfs_jar)

In [52]:
def setup_cloud_storage_extension(conf: SandboxConfig):
    local_jar_path = get_cloud_storage_jar()
    bucketfs_jar = put_in_bucketfs(conf, local_jar_path)
    setup_scripts(conf, bucketfs_jar)

This notebook supposed to be loaded from other notebooks to install the cloud storage extension into the DB.
Example:

```
%run ../setup_db.ipynb
conf = SandboxConfig(EXTERNAL_HOST_NAME="127.0.0.1", HOST_PORT=8563, BUCKETFS_PORT=2580)
setup_schema(conf)

%run 00_setup.ipynb
setup_cloud_storage_extension(conf)
```