In [1]:
import openeo
import os
import geopandas as gpd
from openeo_gfmap.backend import vito_connection, Backend, BackendContext
from openeo_gfmap.fetching import FetchType
from openeo_gfmap import TemporalContext, BoundingBoxExtent
import json
from openeo_gfmap.fetching.s2 import build_sentinel2_l2a_extractor
from openeo_gfmap.preprocessing import mask_scl_dilation
from openeo.processes import sort


In [2]:
t = TemporalContext('2018-01-01','2023-12-31')
b_S2 = ['S2-B01','S2-B02','S2-B03','S2-B04','S2-B05','S2-B06','S2-B07','S2-B08','S2-B8A','S2-B11','S2-B12','S2-SCL']
b_simple = [band.split('-')[-1] for band in b_S2]
t_simple = ['2018-01-01','2023-12-31']
def extract_and_save(b_S2,t,bbox,geom):
    extraction_params = {
        "load_collection":{
            "eo:cloud_cover":lambda val: val <=20
        }
    }
    s2_extractor = build_sentinel2_l2a_extractor(backend_context=BackendContext(Backend.TERRASCOPE),
                                bands=b_S2,
                                fetch_type=FetchType.TILE, 
                                **extraction_params)
    s2_cube = s2_extractor.get_cube(vito_connection(),bbox,t) 
    
    s2_masked = mask_scl_dilation(s2_cube)
    s2_agg = s2_masked.aggregate_spatial(geom, reducer="mean")

    return s2_agg

def extract_and_save_simple(b,t,bbox,geom):
    conn = vito_connection()
    s2_cube = conn.load_collection(
        "SENTINEL2_L2A",
        spatial_extent=bbox,
        temporal_extent = t,
        bands = b,
        max_cloud_cover=10
    )
    s2_masked = s2_cube.process(
        process_id = "mask_scl_dilation",
        arguments = {
            'data':s2_cube,
            'scl_band_name':'SCL'
        }
    )
    s2_agg = s2_masked.aggregate_spatial(geom, reducer="mean")
    return s2_agg

def create_job(datacube, output_path):
    job = datacube.create_job(
        title=output_path.split('/')[-1].split('.')[0], out_format="CSV"
    )
    job.start_and_wait()

    for asset in job.get_results().get_assets():
        if asset.metadata["type"].startswith("text/csv"):
            asset.download(output_path)
            break

In [3]:
filedir = r'./extra_fields/'
i=1
for folder in os.listdir(filedir):
    for file in os.listdir(os.path.join(filedir,folder)):
        if file.endswith('.shp'):
            #if i < 8:
            #    i+=1
            #    continue
            
            print(file.split('.')[0])
            gdf = gpd.read_file(os.path.join(filedir,folder,file))
            geom = json.loads(gdf.geometry.convex_hull.to_json())
            bbox = BoundingBoxExtent(geom['bbox'][0],geom['bbox'][1],geom['bbox'][2],geom['bbox'][3])
            bbox_simple = {"west": geom['bbox'][0],
                            "south":geom['bbox'][1],
                            "east":geom['bbox'][2],
                            "north":geom['bbox'][3]}
                
            output_path = os.path.join(filedir,'output',file.split('.')[0]+'.csv')
            s2_dc = extract_and_save_simple(b_simple,t_simple,bbox_simple,geom)
            create_job(s2_dc,output_path)
            print(f'finished {i}/{len(os.listdir(filedir))}')

            i+=1
        


ShapefilePKZ1
Authenticated using refresh token.
0:00:00 Job 'j-240223ba5db547d183007c414a8bfb3f': send 'start'
0:01:33 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:01:38 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:01:45 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:01:53 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:02:03 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:02:16 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:02:32 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:02:51 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:03:15 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:03:45 Job 'j-240223ba5db547d183007c414a8bfb3f': queued (progress N/A)
0:04:23 Job 'j-240223ba5db547d183007c414a8bfb3f': running (progress N/A)
0:05:10 Job 'j-240223ba5db547d183007c414a8bfb3f': running (progress N/A)
0:06:08 Job 'j-240223b

In [66]:
output_dir = r'./extra_fields/output'
for csv in os.listdir(output_dir):
    csv_open = gpd.read_file(os.path.join(output_dir,csv))
    new_name = csv.split('file')[-1]

    in_cols = [col for col in csv_open.columns  if 'avg' in col]
    out_cols = ['B'+str(int(col.split(')')[0].split('_')[-1])+1)+'_mean' for col in in_cols]
    csv_open = csv_open[['date']+in_cols]
    csv_open = csv_open.rename(columns=dict(zip(in_cols,out_cols)))
    csv_open = csv_open.sort_values(by=['date']).reset_index(drop=True)
    
    csv_open.to_csv(os.path.join(output_dir,new_name))