In [None]:
import os
import glob
import itertools
from deltalake import write_deltalake
import pandas as pd

## Generate some data

In [None]:
covid = pd.read_csv("../data/covid.csv")
write_deltalake("../data/covid-table", covid)

examination = pd.read_csv("../data/examination.csv")
write_deltalake("../data/examination-table", examination)

## Upload tables to GCS

In [None]:
from google.cloud import storage


CLIENT_GCS = storage.Client()


def upload_gcs(local_path, bucket, gcs_path):
    assert os.path.isdir(local_path)
    bucket = CLIENT_GCS.get_bucket(bucket)
    for local_file in itertools.chain(
        glob.glob(local_path + "/**"), glob.glob(local_path + "/.**")
    ):
        if not os.path.isfile(local_file):
            upload_gcs(
                local_file, bucket, gcs_path + "/" + os.path.basename(local_file)
            )
        else:
            remote_path = os.path.join(gcs_path, os.path.basename(local_file))
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)

In [None]:
upload_gcs('../data/covid-table/', 'delta-sharing-test', 'covid')
upload_gcs('../data/examination-table/', 'delta-sharing-test', 'examination')

## Upload tables to S3

In [None]:
import boto3

CLIENT_S3 = boto3.client("s3")


def upload_s3(local_path, bucket, gcs_path):
    assert os.path.isdir(local_path)
    for local_file in itertools.chain(
        glob.glob(local_path + "/**"), glob.glob(local_path + "/.**")
    ):
        if not os.path.isfile(local_file):
            upload_s3(local_file, bucket, gcs_path + "/" + os.path.basename(local_file))
        else:
            remote_path = os.path.join(gcs_path, os.path.basename(local_file))
            CLIENT_S3.upload_file(local_file, bucket, remote_path)

In [None]:
upload_s3('../data/covid-table/', 'delta-sharing-test', 'covid')
upload_s3('../data/examination-table/', 'delta-sharing-test', 'examination')