In [None]:
import os
import numpy as np
import xarray as xr
import s3fs
from dotenv import load_dotenv
import botocore
import boto3
import os
import concurrent.futures

In [None]:
n_workers=8 
max_pool_conn = 3 * n_workers
if max_pool_conn < 60: max_pool_conn = 60
client_config = botocore.config.Config(
    max_pool_connections=max_pool_conn,
)


In [None]:
load_dotenv("s3.env") 

endpoint_url = 'https://objectstore.eodc.eu:2222'
aws_access_key_id = os.getenv("key")
aws_secret_access_key = os.getenv("secret")

In [None]:
client = boto3.client(
    's3',
    endpoint_url=endpoint_url,
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    config=client_config
)

In [None]:
def get_storage_class(fsize):

    small = 16384       # 16KB = 16*1024
    medium = 16777216   # 16MB = 16*1024**2

    if fsize < small:
        storage_class = "EXPRESS_ONEZONE"
    elif fsize < medium:
        storage_class = "STANDARD_3X"
    else:
        storage_class = "STANDARD"

    return storage_class

In [None]:
def upload_one_file(client, bucket_name, fname, object_name, storage_class):
    if storage_class is None:
        client.upload_file(fname, bucket_name, object_name) 
    else:
        client.upload_file(fname, bucket_name, object_name,
                           ExtraArgs={'StorageClass': storage_class}
                           ) 

In [None]:
existing_files = set()

paginator = client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=output_s3_bucket, Prefix="s1sig0.zarr"):
    for obj in page.get('Contents', []):
        existing_files.add(obj['Key'])

In [None]:
# Build a filtered list: only files whose S3 key is NOT already in the bucket
new_ftu = [file_tuple for file_tuple in files_to_upload if file_tuple[1] not in existing_files]

print(f"Total files to upload: {len(new_ftu)}")


In [None]:
files_to_upload= []
output_zarr="s1sig0.zarr"
root_uri=""
remote_dir=""
bname = 's1sig0.zarr'
remote_zarr_uri = os.path.join(root_uri, remote_dir, bname)

for root, dirs, files in os.walk(output_zarr):
    for file in files:
        fpath = os.path.join(root, file)
        relpath = os.path.relpath(fpath, output_zarr)
        remote_output_path = f"{remote_zarr_uri}/{relpath}"
        fsize = os.path.getsize(fpath)
        storage_class = get_storage_class(fsize)
        files_to_upload.append((fpath, remote_output_path, 
                                "STANDARD", fsize))


In [None]:
output_s3_bucket = 'S1Sig0'

with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(upload_one_file, client, output_s3_bucket, 
                                fname, obj_name, storage_class) 
                for fname, obj_name, storage_class, _ in new_ftu]
    concurrent.futures.wait(futures, timeout=None, 
                            return_when=concurrent.futures.ALL_COMPLETED)


In [None]:
store_path = f"s3://S1Sig0/s1sig0.zarr"
ds = xr.open_zarr(store=store_path, group='AT', consolidated=True, chunks={}, storage_options={
    'key': aws_access_key_id,
    'secret': aws_secret_access_key,
    'client_kwargs': {'endpoint_url': 'https://objectstore.eodc.eu:2222'}
}).sel(time=slice("2024-01-01T00:00:00.000000000","2024-03-01T00:00:00.000000000"))

In [None]:
ds