In [None]:
import subprocess
import shutil
import s3fs
import os
import re
from collections import defaultdict

In [None]:
product = 'ga_s2_gm_cyear_3'
version = '0-0-1'
year = 2020
dataset_maturity = 'final'
bucket = "dea-public-data-dev"
http_prefix = "https://dea-public-data-dev.s3.ap-southeast-2.amazonaws.com"

## RGB vrts

In [None]:
# connect to public S3 bucket
fs = s3fs.S3FileSystem(anon=True)

# search for all RGB COGs
search_path = f"{bucket}/{product}/{version}/**/**/{year}--P3Y/{product}_*{year}--P3Y_{dataset_maturity}_*.tif"
all_cogs = fs.glob(search_path)

# group files by tile ID (based on filename)
tiles = defaultdict(dict)
for path in all_cogs:
    filename = os.path.basename(path)
    match = re.search(r"_(red|green|blue)\.tif$", filename)
    if match:
        band = match.group(1)
        tile_id = filename.replace(f"_{band}.tif", "")
        # remove bucket name to make relative path
        relative_path = path.replace(f"{bucket}/", "")
        url = f"/vsicurl/{http_prefix}/{relative_path}"
        tiles[tile_id][band] = url

# create folder for tiles' VRTs
os.makedirs("tile_vrts", exist_ok=True)

# create RGB VRT for each tile
tile_vrts = []
for tile_id, bands in tiles.items():
    if all(b in bands for b in ("red", "green", "blue")):
        vrt_path = f"tile_vrts/{tile_id}.vrt"
        subprocess.run([
            "gdalbuildvrt", "-separate", vrt_path,
            bands["red"], bands["green"], bands["blue"]
        ], check=True)
        tile_vrts.append(vrt_path)

# save list of tile VRTs
with open("tile_list.txt", "w") as f:
    for vrt in tile_vrts:
        f.write(vrt + "\n")

# Build the final RGB mosaic VRT
subprocess.run([
    "gdalbuildvrt", "-input_file_list", "tile_list.txt", "rgb_mosaic.vrt"
], check=True)


shutil.make_archive("tile_vrts", 'zip', "tile_vrts")
print("Created tile_vrts.zip")

## Clear count vrt 


In [None]:
# connect to public S3 bucket
fs = s3fs.S3FileSystem(anon=True)

# search for all RGB COGs
search_path = f"{bucket}/{product}/{version}/**/**/{year}--P3Y/{product}_*{year}--P3Y_{dataset_maturity}_*.tif"
all_cogs = fs.glob(search_path)

# group files by tile ID (based on filename)
tiles = defaultdict(dict)
for path in all_cogs:
    filename = os.path.basename(path)
    match = re.search(r"_(count)\.tif$", filename)
    if match:
        band = match.group(1)
        tile_id = filename.replace(f"_{band}.tif", "")
        # remove bucket name to make relative path
        relative_path = path.replace(f"{bucket}/", "")
        url = f"/vsicurl/{http_prefix}/{relative_path}"
        tiles[tile_id][band] = url

# create folder for tiles' VRTs
os.makedirs("tile_vrts_count", exist_ok=True)

# create RGB VRT for each tile
tile_vrts = []
for tile_id, bands in tiles.items():
    if all(b in bands for b in ("count",)):
        print(band)
        vrt_path = f"tile_vrts_count/{tile_id}.vrt"
        subprocess.run([
            "gdalbuildvrt", "-separate", vrt_path,
            bands["count"]
        ], check=True)
        tile_vrts.append(vrt_path)

# save list of tile VRTs
with open("tile_list_count.txt", "w") as f:
    for vrt in tile_vrts:
        f.write(vrt + "\n")

# Build the final RGB mosaic VRT
subprocess.run([
    "gdalbuildvrt", "-input_file_list", "tile_list_count.txt", "clear_count_mosaic.vrt"
], check=True)

shutil.make_archive("tile_vrts_count", 'zip', "tile_vrts_count")
print("Created tile_vrts_count.zip")