In [1]:
import os
import numpy
import datetime
import json
from pathlib import Path
import rasterio
import geopandas
import pandas
import fiona
import calendar
from azure.storage.blob import BlobClient, ContainerClient
from tqdm.notebook import tqdm

In [2]:
def find_month_end_date(year, month):
    cal = calendar.Calendar()
    month_days = cal.monthdayscalendar(year, month)
    max_day_month = numpy.array(month_days).flatten().max()
    return max_day_month

def zero_pad_num_str(
    num_val: float,
    str_len: int = 3,
    round_num: bool = False,
    round_n_digts: int = 0,
    integerise: bool = False,
    absolute: bool = False,
    gain: float = 1,
) -> str:
    if absolute:
        num_val = abs(num_val)
    if round_num:
        num_val = round(num_val, round_n_digts)
    if integerise:
        num_val = int(num_val * gain)

    num_str = "{}".format(num_val)
    num_str = num_str.zfill(str_len)
    return num_str

In [3]:
def create_alert_gdf(alerts_img, alerts_meta_img):
    alerts_ds = rasterio.open(alerts_img)
    alerts_meta_ds = rasterio.open(alerts_meta_img)
    
    alerts_arr = alerts_ds.read(1)
    score_arr = alerts_meta_ds.read(1)
    year_arr = alerts_meta_ds.read(2)
    month_arr = alerts_meta_ds.read(3)
    
    height = alerts_arr.shape[0]
    width = alerts_arr.shape[1]
    cols, rows = numpy.meshgrid(numpy.arange(width), numpy.arange(height))
    xs, ys = rasterio.transform.xy(alerts_ds.transform, rows, cols)
    lons_arr = numpy.array(xs)
    lats_arr = numpy.array(ys)
    
    alerts_arr = alerts_arr.flatten()
    score_arr = score_arr.flatten()
    year_arr = year_arr.flatten()
    month_arr = month_arr.flatten()
    lons_arr = lons_arr.flatten()
    lats_arr = lats_arr.flatten()
        
    score_arr = score_arr[alerts_arr == 1]
    year_arr = year_arr[alerts_arr == 1]
    month_arr = month_arr[alerts_arr == 1]
    lons_arr = lons_arr[alerts_arr == 1]
    lats_arr = lats_arr[alerts_arr == 1]
    
    dates_list = list()
    for year_val, month_val in zip(year_arr, month_arr):
        day_val = find_month_end_date(year_val, month_val)
        dates_list.append(datetime.datetime(year_val, month_val, day_val))
        
    if score_arr.shape[0] > 0:
        vec_data = dict()
        vec_data["score"] = score_arr
        vec_data["year"] = year_arr
        vec_data["month"] = month_arr
        vec_data["date"] = dates_list
        vec_data["lons"] = lons_arr
        vec_data["lats"] = lats_arr
        alerts_gdf = geopandas.GeoDataFrame(vec_data, geometry=geopandas.points_from_xy(lons_arr, lats_arr))
        return alerts_gdf
    return None
        

In [4]:
def create_monthly_alerts_vec(year, month, out_vec_file, out_vec_lyr, out_vec_pq_file, tiles, sas_token_info):
    month_str = zero_pad_num_str(month, str_len=2)
    tile_alerts_lst = list()
    for tile in tqdm(tiles):
        meta_img = f"gmw_{tile}_{year}{month_str}_chg_alerts_meta.tif"
        meta_img_file_url = os.path.join(sas_token_info["url"], "monthly_change_imgs", meta_img)
        meta_img_file_url_signed = f"{meta_img_file_url}?{sas_token_info['sas_token']}"
        meta_img_exists =  BlobClient.from_blob_url(meta_img_file_url_signed).exists()
        
        alerts_img = f"gmw_{tile}_{year}{month_str}_chg_alerts.tif"
        alerts_img_file_url = os.path.join(sas_token_info["url"], "monthly_change_imgs", alerts_img)
        alerts_img_file_url_signed = f"{alerts_img_file_url}?{sas_token_info['sas_token']}"
        alerts_img_exists =  BlobClient.from_blob_url(alerts_img_file_url_signed).exists()
        if meta_img_exists and alerts_img_exists:
            tile_alerts_gdf = create_alert_gdf(alerts_img_file_url_signed, meta_img_file_url_signed)
            if tile_alerts_gdf is not None:
                tile_alerts_lst.append(tile_alerts_gdf)
    if len(tile_alerts_lst) > 0:
        alerts_gpf = pandas.concat(tile_alerts_lst)
        if len(alerts_gpf) > 0:
            alerts_gpf.to_file(out_vec_file, layer=out_vec_lyr, driver="GPKG")
            alerts_gpf.to_parquet(out_vec_pq_file, compression='gzip')
        else:
            print(f"No alerts for {year}-{month}: Something has probably gone wrong!")
    else:
        print(f"No alerts for {year}-{month}: Something has probably gone wrong!")

In [5]:
def create_gmw_alert_vecs(s_year, s_month, e_year, e_month, lcl_out_dir, tiles, sas_token_info, upload=True, overwrite_azure=False):
    if s_year == e_year:
        print(s_year)
        for month in numpy.arange(s_month, e_month+1, 1):
            month_str = zero_pad_num_str(month, str_len=2)
            out_vec_lyr = f"{s_year}_{month_str}"
            out_vec_file = f"gmw_alerts_{out_vec_lyr}_v1.gpkg"
            out_vec_pq_file = f"gmw_alerts_{out_vec_lyr}_v1.parquet.gzip"
            out_vec_file_path = os.path.join(lcl_out_dir, out_vec_file)
            out_vec_pq_file_path = os.path.join(lcl_out_dir, out_vec_pq_file)
            print(f"\t{out_vec_file}")
            create_monthly_alerts_vec(s_year, month, out_vec_file_path, out_vec_lyr, out_vec_pq_file_path, tiles, sas_token_info)
            if upload and os.path.exists(out_vec_file_path):
                alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", out_vec_file)
                alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
                blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
                with open(out_vec_file_path, 'rb') as data:
                    blob_client.upload_blob(data, overwrite=overwrite_azure)
                blob_client = None
            if upload and os.path.exists(out_vec_pq_file_path):
                alerts_pq_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", out_vec_pq_file)
                alerts_pq_file_url_signed = f"{alerts_pq_file_url}?{sas_token_info['sas_token']}"
                blob_client = BlobClient.from_blob_url(alerts_pq_file_url_signed)
                with open(out_vec_pq_file_path, 'rb') as data:
                    blob_client.upload_blob(data, overwrite=overwrite_azure)
                blob_client = None
    else: 
        for year in numpy.arange(s_year, e_year+1, 1):
            print(year)
            if s_year == year:
                month_s = s_month
                month_e = 12
            elif e_year == year:
                month_s = 1
                month_e = e_month
            else:
                month_s = 1
                month_e = 12

            for month in numpy.arange(month_s, month_e+1, 1):
                month_str = zero_pad_num_str(month, str_len=2)
                out_vec_lyr = f"{year}_{month_str}"
                out_vec_file = f"gmw_alerts_{out_vec_lyr}_v1.gpkg"
                out_vec_pq_file = f"gmw_alerts_{out_vec_lyr}_v1.parquet.gzip"
                out_vec_file_path = os.path.join(lcl_out_dir, out_vec_file)
                out_vec_pq_file_path = os.path.join(lcl_out_dir, out_vec_pq_file)
                print(f"\t{out_vec_file}")
                create_monthly_alerts_vec(year, month, out_vec_file_path, out_vec_lyr, out_vec_pq_file_path, tiles, sas_token_info)
                if upload and os.path.exists(out_vec_file_path):
                    alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", out_vec_file)
                    alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
                    blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
                    with open(out_vec_file_path, 'rb') as data:
                        blob_client.upload_blob(data, overwrite=overwrite_azure)
                    blob_client = None
                if upload and os.path.exists(out_vec_pq_file_path):
                    alerts_pq_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", out_vec_pq_file)
                    alerts_pq_file_url_signed = f"{alerts_pq_file_url}?{sas_token_info['sas_token']}"
                    blob_client = BlobClient.from_blob_url(alerts_pq_file_url_signed)
                    with open(out_vec_pq_file_path, 'rb') as data:
                        blob_client.upload_blob(data, overwrite=overwrite_azure)
                    blob_client = None
    

In [6]:
sas_info_file = "/home/jovyan/azure_info.json"
with open(sas_info_file) as f:
    sas_token_info = json.load(f)

In [7]:
# Define the tiles to be processed.
tiles_gdf = geopandas.read_file("../00_base_data/alert_region_tiles.geojson")
tiles = tiles_gdf["tile"].values
#tiles = tiles[10:20]

tiles = tiles.tolist()
#tiles.remove("N27W077")

n_tiles = len(tiles)
print(f"n_tiles: {n_tiles}")

n_tiles: 484


In [8]:
# specify date range (use n instead of n-1 here)
s_year=2022
s_month=12
e_year=2022
e_month=12
lcl_out_dir = "/home/jovyan/gmw_vec_alerts"
if not os.path.exists(lcl_out_dir):
    os.mkdir(lcl_out_dir)
# set overwrite_azure to True when needed
create_gmw_alert_vecs(s_year, s_month, e_year, e_month, lcl_out_dir, tiles, sas_token_info, upload=True, overwrite_azure=False)

2022
	gmw_alerts_2022_12_v1.gpkg


  0%|          | 0/484 [00:00<?, ?it/s]

No alerts for 2022-12: Something has probably gone wrong!
