In [1]:
import geoengine as ge
import math
from datetime import datetime
import os


## Connect to the Geo Engine

In [2]:
ge.initialize("http://localhost:3030/api")

In [3]:
session = ge.get_session()
user_id = session.user_id
session

Server:              http://localhost:3030/api
User Id:             2d51feaa-2291-45f5-81d0-5dc49523e6b9
Session Id:          98701831-87ba-4b7f-8f50-e5918a00e1f6
Session valid until: 2024-08-15T11:02:11.232Z

Some information about the bands we want to use:

In [4]:
band_resolutions = {
    "10": ["B02", "B03", "B04", "B08"],    
    "20": ["B11", "B12", "SCL"]
}

band_names = ["B02", "B03", "B04", "B08", "B11", "B12"]
scl_name = "SCL"

def get_band_resolution(band_name):
    if band_name =="NDVI":
        return 10

    for (res, bands) in band_resolutions.items():
        if band_name in bands:
            return float(res)
    return None

def get_resoluton_bands(res):
    return band_resolutions[res]

get_band_resolution("B02")

10.0

The tiles we want to use and way to modify the bounds to match the pixel resolution of the band (also use a power of 2 because it is nice)

In [5]:
def next_power_of_2(x):
    return 1 if x == 0 else 2**math.ceil(math.log2(x))

def better_tile_bounds(xmin, ymin, xmax, ymax, res):
    size_x = xmax - xmin
    size_y = ymax - ymin
    x_start = int(xmin/res)*res
    y_start = int(ymin/res)*res
    x_end = x_start + next_power_of_2(size_x/res)*res
    y_end = y_start + next_power_of_2(size_y/res)*res
    return [x_start, y_start, x_end, y_end]


tiles = {
    "1": [677000.0, 5888000.0, 678000.0, 5889000.0], # 1
    
}

max_pixel_size = int(get_band_resolution("B12"))

better_tiles = {b: better_tile_bounds(*tiles[b], max_pixel_size) for b in tiles}

for (tile, tile_bounds) in better_tiles.items():
    print(tile, tile_bounds)
    [xmin, ymin, xmax, ymax] = tile_bounds
    size_x = xmax - xmin
    size_y = ymax - ymin
    print(tile, size_x, size_y)

1 [677000, 5888000, 678280, 5889280]
1 1280 1280


For convenience, the tiles are stored as different datasets. Here is a simple way to resolve there names:

In [6]:
def get_dataset_name(user_id, tile, band):
    band_resolution = int(get_band_resolution(band))
    return f"{user_id}:large_raster_download_sentinel2_{band_resolution}m_tile_{tile}_band_{band}_2022_2023"

get_dataset_name(user_id, "10", "B02")

'2d51feaa-2291-45f5-81d0-5dc49523e6b9:large_raster_download_sentinel2_10m_tile_10_band_B02_2022_2023'

The Geo Engine uses a `QueryRectangle` to request the data. It specifies the bounding box of the area of interest and the time range. (The resolution is also specified here, but this will change in the near future)


In [7]:
def create_query(bounds, resolution, time_start, time_end):
    [xmin, ymin, xmax, ymax] = bounds
    return ge.QueryRectangle(
        spatial_bounds=ge.BoundingBox2D(xmin, ymin, xmax, ymax),
        time_interval=ge.TimeInterval(time_start, time_end),
        resolution=ge.SpatialResolution(resolution, resolution),
        srs="EPSG:32632"
    )

time_start = datetime(2022, 1, 1)
time_end = datetime(2023, 1, 1)
    
create_query(better_tiles["1"], 10.0, time_start, time_end)

QueryRectangle( 
    BoundingBox2D(xmin=677000, ymin=5888000, xmax=678280, ymax=5889280)
    TimeInterval(start=2022-01-01T00:00:00.000000, end=2023-01-01T00:00:00.000000)
    SpatialResolution(x=10.0, y=10.0)
    srs=EPSG:32632 
)

## download raw data

In [8]:

download_dir = "./test/raw_data"
for (i, (tile, tb)) in enumerate(better_tiles.items()):
    for band in band_names + [scl_name]:
        dataset_name = get_dataset_name(user_id, tile, band)
        workflow = ge.workflow_builder.blueprints.sentinel2_band(band_name=band)
        reg_workflow = ge.register_workflow(workflow)

        query = create_query(tb, get_band_resolution(band), time_start, time_end)

        if not os.path.exists(f"{download_dir}{dataset_name[37:]}/"):
            os.makedirs(f"{download_dir}/{dataset_name[37:]}/", exist_ok=True)

        writer = ge.RasterWorkflowRioWriter(f"{download_dir}/{dataset_name[37:]}/", reg_workflow, no_data_value=0)

        await writer.query_and_write(query)

## Download scaled, cloud free data

In [9]:
test_mode = True
download_dir = "./test/scaled_cloud_free"

def create_cloud_free_scaled_workflow(_user_id, _tile, band):
    #dataset_name = get_dataset_name(user_id, tile, band)
    #scl_dataset_name = get_dataset_name(user_id, tile, scl_name)
    workflow =ge.workflow_builder.blueprints.sentinel2_cloud_free_band(
        band_name=band
    )
    workflow = ge.workflow_builder.operators.RasterTypeConversion(workflow, output_data_type="F32") # to float
    workflow = ge.workflow_builder.operators.RasterScaling(workflow, slope=0.00001, offset=0.0) # to reflectance    
    return workflow

for (i, (tile, tb)) in enumerate(better_tiles.items()):
    scl_dataset_name = get_dataset_name(user_id, tile, scl_name)

    if test_mode and i > 0:
        break
    for band in band_names:
        if test_mode and i > 0:
            break
        dataset_name = get_dataset_name(user_id, tile, band)

        workflow = create_cloud_free_scaled_workflow(user_id, tile, band)
        reg_workflow = ge.register_workflow(workflow)

        query = create_query(tb, get_band_resolution(band), time_start, time_end)

        if not os.path.exists(f"{download_dir}{dataset_name[37:]}/"):
            os.makedirs(f"{download_dir}/{dataset_name[37:]}/", exist_ok=True)

        writer = ge.RasterWorkflowRioWriter(f"{download_dir}/{dataset_name[37:]}/", reg_workflow, no_data_value=0)

        await writer.query_and_write(query)

## Download weekly scaled data + NDVI

In [11]:
download_dir = "./test/scaled_cloud_free_7days"

def create_cloud_free_scaled_workflow(_user_id, _tile, band):
    # dataset_name = get_dataset_name(user_id, tile, band)
    # scl_dataset_name = get_dataset_name(user_id, tile, scl_name)
    workflow =ge.workflow_builder.blueprints.sentinel2_cloud_free_band(
        band_name = band
    )
    workflow = ge.workflow_builder.operators.RasterTypeConversion(workflow, output_data_type="F32") # to float
    workflow = ge.workflow_builder.operators.RasterScaling(workflow, slope=0.0001, offset=0.0) # to reflectance    
    return workflow

def create_cloud_free_scaled_workflow_7day_mean(user_id, tile, band):
    workflow = create_cloud_free_scaled_workflow(user_id, tile, band)
    workflow = ge.workflow_builder.operators.TemporalRasterAggregation(workflow, aggregation_type="mean", granularity='days', window_size=7, ignore_no_data=True)
    return workflow

def create_cloud_free_scaled_workflow_7day_mean_ndvi(_user_id, _tile):
    #nir_workflow = create_cloud_free_scaled_workflow_7day_mean(user_id, tile, "B08")
    #red_workflow = create_cloud_free_scaled_workflow_7day_mean(user_id, tile, "B04")
    #stacked_workflow = ge.workflow_builder.operators.RasterStacker([nir_workflow, red_workflow])
    #ndvi_workflow = ge.workflow_builder.operators.Expression("(A-B)/(A+B)", stacked_workflow, "F32", map_no_data=False)
    ndvi_workflow = ge.workflow_builder.blueprints.sentinel2_cloud_free_ndvi()
    workflow = ge.workflow_builder.operators.TemporalRasterAggregation(ndvi_workflow, aggregation_type="mean", granularity='days', window_size=7, ignore_no_data=True)
    return workflow


for (i, (tile, tb)) in enumerate(better_tiles.items()):
    scl_dataset_name = get_dataset_name(user_id, tile, scl_name)
    
    for band in band_names:        
        dataset_name = get_dataset_name(user_id, tile, band)

        workflow = create_cloud_free_scaled_workflow_7day_mean(user_id, tile, band)
        reg_workflow = ge.register_workflow(workflow)
        query = create_query(tb, get_band_resolution(band), time_start, time_end)

        if not os.path.exists(f"{download_dir}{dataset_name[37:]}/"):
            os.makedirs(f"{download_dir}/{dataset_name[37:]}/", exist_ok=True)

        writer = ge.RasterWorkflowRioWriter(f"{download_dir}/{dataset_name[37:]}/", reg_workflow, no_data_value=0)
        await writer.query_and_write(query)

    # ndvi workflow    
    workflow = create_cloud_free_scaled_workflow_7day_mean_ndvi(user_id, tile)
    reg_workflow = ge.register_workflow(workflow)
    query = create_query(tb, 10, time_start, time_end)
    dataset_name = get_dataset_name(user_id, tile, "NDVI")

    if not os.path.exists(f"{download_dir}/ndvi/"):
        os.makedirs(f"{download_dir}/ndvi/", exist_ok=True)
        
    writer = ge.RasterWorkflowRioWriter(f"{download_dir}/ndvi/", reg_workflow, no_data_value=-2)
    await writer.query_and_write(query)
        