In [1]:
import geopandas as gpd
from satio_pc.grid import get_blocks_gdf
from dask import delayed

# from elogs import Elogs, ElogsTask

with open('../../../connstr') as f:
    connect_str = f.read()

# with open('../../../tap') as f:
#     aws_access_key_id, aws_secret_access_key = f.read().split('\n')


tiles = '29TPJ, 29TQJ, 30TUP, 30TVP, 29TPH, 29TQH, 30TUN, 30TVN, 29TPG'.split(', ')

aoi_fn = '../../../habitat_mapping_aoi.gpkg'
aoi = gpd.read_file(aoi_fn).to_crs(epsg=4326)

blocks_gdf = get_blocks_gdf(tiles)
blocks_gdf = gpd.sjoin(blocks_gdf, aoi[['geometry']])


In [2]:
from satio_pc.utils.azure import AzureBlobReader

az = AzureBlobReader(connect_str, 'habitat')

In [3]:
for blob in az.container_client.list_blobs():
    print(blob.name)

features/2022/s2/29TPJ/satio-features-s2_29TPJ_071_2022.tif
logs/done/2022/s2/done_29TPJ_071_2022.log
logs/error/2021/s2/error_29TPJ_071_2021.log
logs/proc/2021/s2/proc_29TPJ_071_2021.log
logs/proc/2022/s2/proc_29TPJ_071_2022.log


# Cluster setup

In [4]:
from dask.distributed import PipInstall
import dask_gateway

cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
# cluster.adapt(minimum=5, maximum=100)
cluster.scale(20)

# plugin = PipInstall(packages=["git+https://github.com/dzanaga/satio-pc/tree/main/satio_pc"], pip_options=["--upgrade"])
# client.register_worker_plugin(plugin)
satio_pc_url = "git+https://github.com/dzanaga/satio-pc.git@main"
elogs_url = "http://s3-eu-central-1.amazonaws.com/vito-worldcover-public/wheels/elogs-0.1.5-py3-none-any.whl"
plugin = PipInstall(packages=[satio_pc_url,
                              elogs_url])
client.register_worker_plugin(plugin)

print(client.dashboard_link)

https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.19929c8d987a46999b07e23cb89b7443/status


In [21]:
# client.upload_file('s2feats.py')

In [3]:
# !pip uninstall elogs -y

In [4]:
!pip install "http://s3-eu-central-1.amazonaws.com/vito-worldcover-public/wheels/elogs-0.1.5-py3-none-any.whl"

# L2A Features

In [4]:
import dask
import subprocess
from dataclasses import dataclass

year = 2022

# def process(block):
#     cmd = f'ewc l2a {block.tile} {block.block_id} {year} -r habitat -c -k "{connect_str}"'
#     p = subprocess.run(cmd.split())
#     return p.returncode

def process(args):
    from satio_pc.extraction import S2BlockExtractor
    s2ex = S2BlockExtractor(args.tile,
                            args.block_id,
                            args.year,
                            output_folder=args.output,
                            connection_str=args.connstr,
                            container_name=args.container,
                            cleanup=args.cleanup,
                            terminate_if_failed=args.terminate)

    s2ex.extract()

In [5]:
@dataclass
class Args:
    tile: str
    block_id: int
    year: int
    output: str
    connstr: str
    container: str
    cleanup: bool
    terminate: bool

In [6]:
output_folder = '.'
container_name = 'habitat'
cleanup = True
terminate_if_failed = False

blocks = list(blocks_gdf.itertuples())
args = [Args(block.tile,
             block.block_id,
             year,
             output_folder,
             connect_str,
             container_name,
             cleanup,
             terminate_if_failed)
        for block in blocks
        for year in (2021, 2022)]


In [None]:
process(args[0])

2023-04-06 19:25:58.887 | INFO     | satio_pc.sentinel2:preprocess_l2a:287 - Loading block data


In [None]:
futures = client.map(process, args[:5])
results = client.gather(futures)

In [10]:
len(results)

597

In [None]:
for blob in az.container_client.list_blobs():
    print(blob.name)

In [30]:
cluster.shutdown()

  self.scheduler_comm.close_rpc()


In [22]:
# import os
# os.environ['AWS_DEFAULT_REGION'] = 'eu-central-1'

# app_id = (f'habitat_s2')
# elogs = Elogs(app_id,
#               aws_access_key_id,
#               aws_secret_access_key,
#               logs_bucket='vito-worldcover',
#               overwrite_table=True)

In [10]:
import dask

blocks_gdf['cid'] = blocks_gdf.tile + '_' + blocks_gdf.block_id.astype(str)

sensor = 'l2a'
container_name = 'habitattest'

tasks = [ElogsTask(block.cid, block, year)
         for block in blocks_gdf.iloc[:3].itertuples()
         for year in (2017, 2018, 2019, 2020, 2021, 2022)]

with elogs.start(tasks) as filtered_tasks:
    futures = client.map(extract_s2_features, tasks,
                         resources={'processes': 4})
    results = dask.compute(*futures)

In [4]:
blocks = list(blocks_gdf.iloc[:3].itertuples())

In [None]:
results = client.gather(*futures)

In [10]:
results[0]

In [10]:
client.compute(results[0])

In [11]:
client.has_what().items()

dict_items([('tls://10.244.125.10:40821', ('process-6c1b0516-3cac-4b16-922b-48901fe1c7e8',)), ('tls://10.244.125.7:41765', ()), ('tls://10.244.125.8:40949', ('process-75a5eb70-3a61-499e-82dd-28399d86bb62',)), ('tls://10.244.125.9:34319', ('process-b3836ba7-1b85-43db-b226-63e736aeb5d9',)), ('tls://10.244.66.98:33573', ('process-d09fa4c6-6201-4925-9df7-b394dc5792e2',)), ('tls://10.244.70.89:33237', ())])

In [10]:
results = dask.compute(*futures)

In [19]:
cluster.shutdown()

  self.scheduler_comm.close_rpc()


In [9]:
from azure.storage.blob import BlobServiceClient

# Replace with your connection string


# Replace with your container name
container_name = "habitattest"

# Initialize the BlobServiceClient
blob_service_client = BlobServiceClient.from_connection_string(connect_str)

# Get a reference to the container
container_client = blob_service_client.get_container_client(container_name)

# List blobs in the container
blobs = container_client.list_blobs()

# Iterate through the blobs and print their names, then delete them
for blob in blobs:
    print(f"Deleting blob: {blob.name}")
    container_client.delete_blob(blob.name)

Deleting blob: ewc_29TPJ_071/satio-features-s2_29TPJ_071.tif
Deleting blob: ewc_29TPJ_072/satio-features-s2_29TPJ_072_2021.tif
Deleting blob: ewc_29TPJ_073/PROC_29TPJ_073.log
Deleting blob: ewc_29TPJ_073/satio-features-s2_29TPJ_073_2021.tif
Deleting blob: ewc_29TPJ_074/PROC_29TPJ_074.log
Deleting blob: ewc_29TPJ_074/satio-features-s2_29TPJ_074_2021.tif
Deleting blob: satio-features-s2_29TPJ_071.tif
Deleting blob: satio-s2-features_29TQH_004.tif


# GAMMA0

In [None]:
# S1 features
s1 = load_gamma0(block.bounds,
                 block.epsg,
                 start_date,
                 end_date)

# preprocess s1
s1, obs_gamma0 = preprocess_gamma0(s1,
                                   start_date,
                                   end_date,
                                   composite_freq=settings['gamma0']['composite']['freq'], 
                                   composite_window=settings['gamma0']['composite']['window'],
                                   tmpdir=tmpdir.name)

s1_indices = ["vh_vv", "rvi"]

s1_vi = s1.ewc.indices(s1_indices)

ps = [s.ewc.percentile(q, name_prefix='s1') for s in (s1, s1_vi)]

# fix time to same timestamp (only 1) to avoid concat issues (different compositing settings for s2 and s1)
for p in ps:
    p['time'] = ps[0].time
    
obs_gamma0 = np.expand_dims(np.squeeze(obs_gamma0), (0, 1))
obs_gamma0 = ps[0].isel(band=0).copy(data=obs_gamma0)
obs_gamma0['band'] = ['obs_gamma0']

final = xr.concat(ps + [obs_gamma0], dim='band')
final.name = 'satio-features-s1'

final = final.ewc.cache('.', (1, 1, 512, 512))
final = final.squeeze()

crs = CRS.from_epsg(block.epsg)
final = final.rio.write_crs(crs)
final_ds = final.to_dataset('band')

fn = f'{final.name}_{block.tile}_{block.block_id:03d}.tif'
final_ds.rio.to_raster(fn,
                       windowed=False,
                       tiled=True,
                       compress='deflate',
                       predictor=3,
                       zlevel=4)