In [2]:
import os
import numpy as np
import xarray as xr
import s3fs
from dotenv import load_dotenv
import botocore
import boto3
import os
import concurrent.futures

In [2]:
n_workers=8 
max_pool_conn = 3 * n_workers
if max_pool_conn < 60: max_pool_conn = 60
client_config = botocore.config.Config(
    max_pool_connections=max_pool_conn,
)


In [4]:
load_dotenv("s3.env") 

endpoint_url = 'https://objectstore.eodc.eu:2222'
aws_access_key_id = os.getenv("key")
aws_secret_access_key = os.getenv("secret")

In [4]:
client = boto3.client(
    's3',
    endpoint_url=endpoint_url,
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    config=client_config
)

In [5]:
def get_storage_class(fsize):

    small = 16384       # 16KB = 16*1024
    medium = 16777216   # 16MB = 16*1024**2

    if fsize < small:
        storage_class = "EXPRESS_ONEZONE"
    elif fsize < medium:
        storage_class = "STANDARD_3X"
    else:
        storage_class = "STANDARD"

    return storage_class

In [6]:
def upload_one_file(client, bucket_name, fname, object_name, storage_class):
    if storage_class is None:
        client.upload_file(fname, bucket_name, object_name) 
    else:
        client.upload_file(fname, bucket_name, object_name,
                           ExtraArgs={'StorageClass': storage_class}
                           ) 

In [None]:
existing_files = set()

paginator = client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=output_s3_bucket, Prefix="s1sig0.zarr"):
    for obj in page.get('Contents', []):
        existing_files.add(obj['Key'])

In [45]:
# Build a filtered list: only files whose S3 key is NOT already in the bucket
new_ftu = [file_tuple for file_tuple in files_to_upload if file_tuple[1] not in existing_files]

print(f"Total files to upload: {len(new_ftu)}")


Total files to upload: 234


In [44]:
files_to_upload= []
output_zarr="s1sig0.zarr"
root_uri=""
remote_dir=""
bname = 's1sig0.zarr'
remote_zarr_uri = os.path.join(root_uri, remote_dir, bname)

for root, dirs, files in os.walk(output_zarr):
    for file in files:
        fpath = os.path.join(root, file)
        relpath = os.path.relpath(fpath, output_zarr)
        remote_output_path = f"{remote_zarr_uri}/{relpath}"
        fsize = os.path.getsize(fpath)
        storage_class = get_storage_class(fsize)
        files_to_upload.append((fpath, remote_output_path, 
                                "STANDARD", fsize))


In [46]:
output_s3_bucket = 'S1Sig0'

with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(upload_one_file, client, output_s3_bucket, 
                                fname, obj_name, storage_class) 
                for fname, obj_name, storage_class, _ in new_ftu]
    concurrent.futures.wait(futures, timeout=None, 
                            return_when=concurrent.futures.ALL_COMPLETED)


In [9]:
files_to_upload

[('s1sig0.zarr/AT/time/zarr.json',
  's1sig0.zarr/AT/time/zarr.json',
  'STANDARD',
  737),
 ('s1sig0.zarr/AT/time/c/0', 's1sig0.zarr/AT/time/c/0', 'STANDARD', 9067)]

In [47]:
fs = s3fs.S3FileSystem(
    anon=False,
    key=aws_access_key_id,
    secret=aws_secret_access_key,
    client_kwargs={'endpoint_url': 'https://objectstore.eodc.eu:2222'}
)

# 2. Define the full S3 path to your Zarr store
bucket_name = 'S1Sig0'
zarr_store = f"{bucket_name}/s1sig0.zarr"

# 3. Open the Zarr store and specify the group
ds = xr.open_zarr(store=fs.get_mapper(zarr_store), consolidated=True, group='AT', chunks={})

TypeError: Unsupported type for store_like: 'FSMap'

In [5]:
store_path = f"s3://S1Sig0/s1sig0.zarr"
ds = xr.open_zarr(store=store_path, group='AT', consolidated=True, chunks={}, storage_options={
    'key': aws_access_key_id,
    'secret': aws_secret_access_key,
    'client_kwargs': {'endpoint_url': 'https://objectstore.eodc.eu:2222'}
}).sel(time=slice("2024-01-01T00:00:00.000000000","2024-03-01T00:00:00.000000000"))

In [55]:
ds.sel(time=slice("2024-01-03T00:00:00.000000000","2024-01-03T00:00:00.000000000") , x=slice(4800010, 4801990), y=slice(1799990, 1798010)).load()

In [6]:
ds

Unnamed: 0,Array,Chunk
Bytes,306.78 GiB,7.15 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 306.78 GiB 7.15 MiB Shape (61, 30000, 45000) (30, 250, 250) Dask graph 64800 chunks in 3 graph layers Data type float32 numpy.ndarray",45000  30000  61,

Unnamed: 0,Array,Chunk
Bytes,306.78 GiB,7.15 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,57.22 MiB
Shape,"(61, 30000, 45000)","(30, 500, 500)"
Dask graph,16200 chunks in 3 graph layers,16200 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 613.56 GiB 57.22 MiB Shape (61, 30000, 45000) (30, 500, 500) Dask graph 16200 chunks in 3 graph layers Data type float64 numpy.ndarray",45000  30000  61,

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,57.22 MiB
Shape,"(61, 30000, 45000)","(30, 500, 500)"
Dask graph,16200 chunks in 3 graph layers,16200 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,14.31 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 613.56 GiB 14.31 MiB Shape (61, 30000, 45000) (30, 250, 250) Dask graph 64800 chunks in 3 graph layers Data type float64 numpy.ndarray",45000  30000  61,

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,14.31 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,14.31 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 613.56 GiB 14.31 MiB Shape (61, 30000, 45000) (30, 250, 250) Dask graph 64800 chunks in 3 graph layers Data type datetime64[ns] numpy.ndarray",45000  30000  61,

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,14.31 MiB
Shape,"(61, 30000, 45000)","(30, 250, 250)"
Dask graph,64800 chunks in 3 graph layers,64800 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,57.22 MiB
Shape,"(61, 30000, 45000)","(30, 500, 500)"
Dask graph,16200 chunks in 3 graph layers,16200 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 613.56 GiB 57.22 MiB Shape (61, 30000, 45000) (30, 500, 500) Dask graph 16200 chunks in 3 graph layers Data type float64 numpy.ndarray",45000  30000  61,

Unnamed: 0,Array,Chunk
Bytes,613.56 GiB,57.22 MiB
Shape,"(61, 30000, 45000)","(30, 500, 500)"
Dask graph,16200 chunks in 3 graph layers,16200 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
