In [1]:
# Jupyter notebook related
%reload_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd
from satio_pc.grid import get_blocks_gdf
from dask import delayed

# from elogs import Elogs, ElogsTask

with open('../../../connstr_vegteam') as f:
    connect_str = f.read()
container_name = 'sen4ldn'

# Test tiles for SEN4LDN

# Uganda: 36NUG
# Portugal: 29TNE
# Colombia: 18NUJ

In [13]:
from satio_pc.utils.azure import AzureBlobReader

azure = AzureBlobReader(connect_str,
                        container_name)
azure.list_files()
for fn in azure.list_files():
    azure.delete_file(fn)

In [4]:
azure.list_files()

['features/2018/s2/18NUJ/satio-features-s2_18NUJ_000_2018.tif',
 'logs/done/2018/s2/done_18NUJ_000_2018.log',
 'logs/proc/2018/s2/proc_18NUJ_000_2018.log']

In [4]:
# azure.download_file('features/2018/s2/18NUJ/satio-features-s2_18NUJ_000_2018.tif', 'satio-features-s2_18NUJ_000_2018.tif')

In [5]:
from satio_pc.sentinel2 import load_l2a
from satio_pc.extraction import S2BlockExtractor


settings = {

    "l2a": {
        "max_cloud_cover": 90,
        "composite": {"freq": 10, "window": 20},
        "mask": {"erode_r": 3,
                 "dilate_r": 13,
                 "max_invalid_ratio": 1}},

    "gamma0": {
        "composite": {"freq": 10, "window": 10}},
}


tiles = '18NUJ 29TNE 36NUG'.split()

blocks_gdf = get_blocks_gdf(tiles)
blocks_gdf = blocks_gdf[blocks_gdf['area'] == 104857600]  # only squares, bug on others
blocks_gdf = blocks_gdf.sort_values('block_id')

In [6]:
blocks_gdf['area'].value_counts()

area
104857600.0    300
Name: count, dtype: int64

In [7]:
block = next(blocks_gdf.iloc[[10]].itertuples())

In [8]:
bands = [f'B{b:02d}' for b in (2, 3, 4, 8, 11, 12)]
indices = ['ndvi']

def extract_s2(block, year):
    print(block.tile, block.block_id, year)
    extractor = S2BlockExtractor(block.tile,
                                 block.block_id,
                                 year,
                                 bands=bands,
                                 indices=indices,
                                 output_folder='/tmp',
                                 connection_str=connect_str,
                                 container_name=container_name)
    
    log_fn = f'logs/done/{year}/s2/done_{block.tile}_{block.block_id}_{year}.log'
    done_logs = extractor._azure_client.list_files(prefix='logs/done/')
    if log_fn in done_logs:
        print(f'{block.tile} {block.block_id} {year} already extracted')
        return None
    else:
        extractor.extract()

args = [(b, y) for b in blocks_gdf.itertuples()
        for y in range(2018, 2023)]
 

In [9]:
import multiprocessing

In [None]:
for a in args:
    p = multiprocessing.Process(target=extract_s2,
                                args=a)
    p.start()
    p.join()


bugs to fix:
- weird line patterns
- weird bands descriptions.
- save as bytes with min max scaling and offset

In [14]:
extract_s2(*args[0])

18NUJ 0 2018


  times = pd.to_datetime(
  times = pd.to_datetime(
  times = pd.to_datetime(
  times = pd.to_datetime(
[32m2023-06-27 09:52:47.680[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m398[0m - [1mLoading block data[0m
[32m2023-06-27 09:53:43.783[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m417[0m - [1mCompositing 10m block data[0m
[32m2023-06-27 09:55:21.396[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m426[0m - [1mInterpolating 10m block data[0m
[32m2023-06-27 09:56:44.037[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m439[0m - [1mCompositing 20m block data[0m
[32m2023-06-27 09:57:24.313[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m448[0m - [1mInterpolating 20m block data[0m
[32m2023-06-27 09:57:54.797[0m | [1mINFO    [0m | [36msatio_pc.sentinel2[0m:[36mpreprocess_l2a[0m:[36m455[0m -