In [17]:
from pathlib import Path

# AOI bounding box (lon/lat)
west, south, east, north = -117.25, 32.5, -117.05, 32.7

# Time window
start_date = "2023-01-01"
end_date   = "2023-12-31"

# rclone remote names (as you set in `rclone config`)
SRC_REMOTE = "cdse"   # Copernicus EODATA
DST_REMOTE = "s3"     # TIDE S3

# Destination prefix inside your TIDE S3 bucket
# (This is a "remote:path" WITHOUT the remote name, since we add DST_REMOTE below.)
DST_PREFIX = "sdsu-ereilly0150/s2/safe_2023/"   # must end with /

# rclone performance knobs
TRANSFERS = 4
CHECKERS = 8

# Safety switches
DRY_RUN = False   # set True first to test without copying
LIMIT = 0         # set e.g. 5 for a small test

In [18]:
import subprocess, textwrap

def run(cmd):
    print(" ".join(cmd))
    p = subprocess.run(cmd, capture_output=True, text=True)
    print(p.stdout)
    if p.returncode != 0:
        print("STDERR:", p.stderr)
    return p.returncode

# These should work without errors:
run(["rclone", "lsd", f"{SRC_REMOTE}:eodata", "--max-depth", "1"])
run(["rclone", "lsd", f"{DST_REMOTE}:{DST_PREFIX}", "--max-depth", "1"])

rclone lsd cdse:eodata --max-depth 1
           0 2000-01-01 00:00:00        -1 C3S
           0 2000-01-01 00:00:00        -1 CAMS
           0 2000-01-01 00:00:00        -1 CEMS
           0 2000-01-01 00:00:00        -1 CLMS
           0 2000-01-01 00:00:00        -1 CLMS_archive
           0 2000-01-01 00:00:00        -1 Envisat
           0 2000-01-01 00:00:00        -1 Envisat-ASAR
           0 2000-01-01 00:00:00        -1 Global-Mosaics
           0 2000-01-01 00:00:00        -1 Jason-3
           0 2000-01-01 00:00:00        -1 Landsat-5
           0 2000-01-01 00:00:00        -1 Landsat-7
           0 2000-01-01 00:00:00        -1 Landsat-8-ESA
           0 2000-01-01 00:00:00        -1 SMOS
           0 2000-01-01 00:00:00        -1 SRTM
           0 2000-01-01 00:00:00        -1 Sentinel-1
           0 2000-01-01 00:00:00        -1 Sentinel-1-RTC
           0 2000-01-01 00:00:00        -1 Sentinel-2
           0 2000-01-01 00:00:00        -1 Sentinel-3
           0 2000-01-

0

In [20]:
from pystac_client import Client
from shapely.geometry import box, mapping
from urllib.parse import urlparse

stac = Client.open("https://stac.dataspace.copernicus.eu/v1/")

search = stac.search(
    collections=["sentinel-2-l2a"],
    intersects=mapping(box(west, south, east, north)),
    datetime=f"{start_date}/{end_date}",
)

items = list(search.get_items())
if LIMIT and LIMIT > 0:
    items = items[:LIMIT]
    

print("Scenes found:", len(items))

def safe_root_from_item(item):
    """
    Find any s3://eodata/.../<PRODUCT>.SAFE/... href in assets and return the SAFE root:
      eodata/Sentinel-2/MSI/L2A/YYYY/MM/DD/<PRODUCT>.SAFE
    """
    for a in item.assets.values():
        href = getattr(a, "href", "") or ""
        if href.startswith("s3://eodata/") and ".SAFE/" in href:
            path = urlparse(href).path   # "/Sentinel-2/MSI/L2A/.../<P>.SAFE/GRANULE/..."
            safe_path = path.split(".SAFE/")[0] + ".SAFE"
            return "eodata" + safe_path  # add bucket name
    return None

safe_roots = []
missing = []

for it in items:
    root = safe_root_from_item(it)
    if root:
        safe_roots.append((it.id, root.lstrip("/")))
    else:
        missing.append(it.id)

print("SAFE roots found:", len(safe_roots))
print("Missing SAFE roots:", len(missing))
safe_roots[:3]



Scenes found: 286
SAFE roots found: 286
Missing SAFE roots: 0


[('S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408',
  'eodata/Sentinel-2/MSI/L2A/2023/12/31/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408.SAFE'),
 ('S2B_MSIL2A_20231231T182749_N0510_R127_T11SMR_20231231T205408',
  'eodata/Sentinel-2/MSI/L2A/2023/12/31/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMR_20231231T205408.SAFE'),
 ('S2A_MSIL2A_20231229T183801_N0510_R027_T11SMS_20231229T221252',
  'eodata/Sentinel-2/MSI/L2A/2023/12/29/S2A_MSIL2A_20231229T183801_N0510_R027_T11SMS_20231229T221252.SAFE')]

In [None]:
import subprocess

def rclone_copy_safe(src_root, product_id):
    # src_root like: eodata/Sentinel-2/MSI/L2A/.../<PRODUCT>.SAFE
    product_name = src_root.split("/")[-1]  # "<PRODUCT>.SAFE"
    src = f"{SRC_REMOTE}:{src_root}"
    dst = f"{DST_REMOTE}:{DST_PREFIX}{product_name}"

    cmd = [
        "rclone", "copy",
        src, dst,
        "--fast-list",
        "--transfers", str(TRANSFERS),
        "--checkers", str(CHECKERS),
        "--retries", "5",
        "--low-level-retries", "20",
        "--stats", "30s",
        "--stats-one-line",
        "--create-empty-src-dirs",
    ]
    if DRY_RUN:
        cmd.append("--dry-run")

    print(f"\nCopying {product_id}")
    print(" SRC:", src)
    print(" DST:", dst)
    print(" CMD:", " ".join(cmd))

    p = subprocess.run(cmd, capture_output=True, text=True)
    if p.returncode != 0:
        return False, p.stderr[:500]
    return True, ""

failed = []
ok = 0

for idx, (pid, src_root) in enumerate(safe_roots, 1):
    print(f"\n[{idx}/{len(safe_roots)}]")
    success, err = rclone_copy_safe(src_root, pid)
    if success:
        ok += 1
    else:
        failed.append((pid, err))

print("\nDone.")
print("Succeeded:", ok)
print("Failed:", len(failed))
failed[:3]


[1/286]

Copying S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408
 SRC: cdse:eodata/Sentinel-2/MSI/L2A/2023/12/31/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408.SAFE
 DST: s3:sdsu-ereilly0150/s2/safe_2023/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408.SAFE
 CMD: rclone copy cdse:eodata/Sentinel-2/MSI/L2A/2023/12/31/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408.SAFE s3:sdsu-ereilly0150/s2/safe_2023/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMS_20231231T205408.SAFE --fast-list --transfers 4 --checkers 8 --retries 5 --low-level-retries 20 --stats 30s --stats-one-line --create-empty-src-dirs

[2/286]

Copying S2B_MSIL2A_20231231T182749_N0510_R127_T11SMR_20231231T205408
 SRC: cdse:eodata/Sentinel-2/MSI/L2A/2023/12/31/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMR_20231231T205408.SAFE
 DST: s3:sdsu-ereilly0150/s2/safe_2023/S2B_MSIL2A_20231231T182749_N0510_R127_T11SMR_20231231T205408.SAFE
 CMD: rclone copy cdse:eodata/Sentinel-2/MSI/L2A/202