In [1]:
from ldn.utils import submit_job, get_job_status, get_tiles
from ldn.processor import LDNProcessor
import s3fs

import time

In [8]:
def execute(year, tile=None):
    extra_params = []
    if tile is not None:
        multi = False
        extra_params = ["--tile", ",".join([str(t) for t in tile])]

    job_name = f"version-0-1-0-{year}"
    job_queue = "normalQueue"
    job_definition = "auspatious-ldn"
    container_overrides = {
        "command": [
            "ldn-processor",
            "--year",
            "Ref::year",
            "--version",
            "Ref::version",
            "--n-workers",
            "Ref::n_workers",
            "--threads-per-worker",
            "Ref::threads_per_worker",
            "--memory-limit",
            "Ref::memory_limit",
            "Ref::overwrite",
            *extra_params,
        ],
        "vcpus": 16,
        "memory": 122880,
    }
    parameters = {
        "tile": "238,47",
        "year": f"{year}",
        "version": "0.1.0",
        "n_workers": "4",
        "threads_per_worker": "32",
        "memory_limit": "100GB",
        "overwrite": "--no-overwrite",
    }

    job_id = submit_job(
        job_name, job_queue, job_definition, container_overrides, parameters, multi=multi
    )
    print("Job submitted with id:", job_id)
    return job_id

restart = True
jobs={}

In [None]:
from IPython.display import clear_output

job_status={}

for year in range(2000, 2024):
    if restart:
        job_id = execute(year)
        jobs[year] = job_id
    job_status[job_id] = get_job_status(job_id)

restart = False

# status = get_job_status(job_id)
not_complete = True
while not_complete:
    clear_output(wait=True)
    for year, job_id in jobs.items():        # If all jobs are either complete or failed, then break the loop
        if all([status in ["SUCCEEDED", "FAILED"] for status in job_status.values()]):
            print(f"All tasks completed, or failed: {job_status}")
            not_complete = False
            break
    
        status = get_job_status(job_id)
        job_status[job_id] = status

        print(f"{year} is {status}")

    if not_complete:
        time.sleep(2)

In [11]:

version = "0.1.0"

path = f"s3://data.ldn.auspatious.com/geo_ls_lp/{version.replace('.', '_')}/**/*.stac-item.json"

fs = s3fs.S3FileSystem(anon=True)
stac_keys = fs.glob(path)

print(f"Found {len(stac_keys)} out of {30*(2024-2000)} STAC keys like this: {stac_keys[0]}")

incomplete_tiles = []
tiles = get_tiles()

for year in range(2000, 2024):
    for tile, geobox in tiles:
        processor = LDNProcessor(
            tile,
            year=year,
            bucket="data.ldn.auspatious.com",
            configure_s3=False,
            version="0.1.0"
        )
        key = f"{processor.bucket}/{processor.stac_key}"
        if not key in stac_keys:
            incomplete_tiles.append((tile, year))
            print(f"Tile {tile} from {year} is not complete yet")
            

Found 710 out of 720 STAC keys like this: data.ldn.auspatious.com/geo_ls_lp/0_1_0/046/237/2000/geo_ls_lp_2000_046_237.stac-item.json
Tile (49, 238) from 2001 is not complete yet
Tile (69, 79) from 2001 is not complete yet
Tile (70, 60) from 2001 is not complete yet
Tile (71, 61) from 2001 is not complete yet
Tile (66, 78) from 2003 is not complete yet
Tile (66, 80) from 2003 is not complete yet
Tile (47, 237) from 2005 is not complete yet
Tile (70, 61) from 2005 is not complete yet
Tile (49, 238) from 2010 is not complete yet
Tile (71, 60) from 2013 is not complete yet


In [16]:
for tile, year in incomplete_tiles:
    job_id = execute(year, tile=tile)

Job submitted with id: e743aff4-d9f7-4e1b-92e5-5ceda6b244c9
