In [None]:
import geopandas
import pandas
import numpy
import scipy.spatial
import json
import os
from azure.storage.blob import BlobClient

In [None]:
def zero_pad_num_str(
    num_val: float,
    str_len: int = 3,
    round_num: bool = False,
    round_n_digts: int = 0,
    integerise: bool = False,
    absolute: bool = False,
    gain: float = 1,
) -> str:
    if absolute:
        num_val = abs(num_val)
    if round_num:
        num_val = round(num_val, round_n_digts)
    if integerise:
        num_val = int(num_val * gain)

    num_str = "{}".format(num_val)
    num_str = num_str.zfill(str_len)
    return num_str

In [None]:
sas_info_file = "/home/jovyan/azure_info.json"
with open(sas_info_file) as f:
    sas_token_info = json.load(f)
    
tmp_dir = "tmp_lcl"
if not os.path.exists(tmp_dir):
    os.mkdir(tmp_dir)

## Define the dates for analysis:

Define the current and previous month/year. Note, the merged result for the previous month needs to be available. 

### Process for Nov 2022:
```
c_month = 11
c_year = 2022

p_month = 10
p_year = 2022
```

### Process for Jan 2023:
```
c_month = 1
c_year = 2023

p_month = 12
p_year = 2022
```

### Process for June 2023:
```
c_month = 6
c_year = 2023

p_month = 5
p_year = 2023
```


In [None]:
c_month = 3
c_year = 2023
c_month_str = zero_pad_num_str(c_month, str_len=2)

p_month = 2
p_year = 2023
p_month_str = zero_pad_num_str(p_month, str_len=2)

## Get the current month vector layer from Azure.

In [None]:
# Find check if alerts vector exists for the month specified and download if it does exist
alerts_vec_lyr = f"{c_year}_{c_month_str}"
alerts_vec_file = f"gmw_alerts_{alerts_vec_lyr}_v1.parquet.gzip"
alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", alerts_vec_file)
alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
alerts_vec_blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
if not alerts_vec_blob_client.exists():
    raise Exception("A vector alerts file does not exist for the month/year specified - have you generated?")

alerts_vec_lcl_file = os.path.join(tmp_dir, alerts_vec_file)
with open(file=alerts_vec_lcl_file, mode="wb") as download_file:
    download_file.write(alerts_vec_blob_client.download_blob().readall())
alerts_vec_blob_client = None

# Read the downloaded vector layer into geopandas
c_alerts_gdf = geopandas.read_parquet(alerts_vec_lcl_file)
c_alerts_gdf = c_alerts_gdf.set_crs(epsg=4326, allow_override=True)

## Read the merged vector with all the previous alerts:

In [None]:
all_alerts_vec_file = f"gmw_alerts_all_{p_year}{p_month_str}_qad_v1.parquet.gzip"

# Read the vector data into geopandas
p_alerts_gdf = geopandas.read_parquet(all_alerts_vec_file)
p_alerts_gdf = p_alerts_gdf.set_crs(epsg=4326, allow_override=True)

## Merge the two layers

This will be the layer to calculate the distances too.

In [None]:
all_alerts_gdf = pandas.concat([c_alerts_gdf, p_alerts_gdf])
all_alerts_gdf = all_alerts_gdf.set_crs(epsg=4326, allow_override=True)

## Create index and query index to find the number of points within a set of radii

In [None]:
print("Build Index")
tree_idx = scipy.spatial.KDTree(list(zip(all_alerts_gdf.geometry.x, all_alerts_gdf.geometry.y)))

pxl_size = 0.0002 # 22 m

print("Perform Distance Query: 222 m")
n_pts_r222 = tree_idx.query_ball_point(
    list(zip(c_alerts_gdf.geometry.x, c_alerts_gdf.geometry.y)),
    r=pxl_size * 10, # 222 m,
    p=2.0,
    eps=0,
    workers=3,
    return_sorted=None,
    return_length=True,
)
n_pts_r222 = n_pts_r222 - 1


print("Perform Distance Query: 444 m")
n_pts_r444 = tree_idx.query_ball_point(
    list(zip(c_alerts_gdf.geometry.x, c_alerts_gdf.geometry.y)),
    r=pxl_size * 20, # 444 m,
    p=2.0,
    eps=0,
    workers=3,
    return_sorted=None,
    return_length=True,
)
n_pts_r444 = n_pts_r444 - 1

print("Perform Distance Query: 2222 m")
n_pts_r2222 = tree_idx.query_ball_point(
    list(zip(c_alerts_gdf.geometry.x, c_alerts_gdf.geometry.y)),
    r=pxl_size * 100, # 2222 m,
    p=2.0,
    eps=0,
    workers=3,
    return_sorted=None,
    return_length=True,
)
n_pts_r2222 = n_pts_r2222 - 1

# Add the counts to the output vector layer:
c_alerts_gdf["n_pts_r2222"] = n_pts_r2222
c_alerts_gdf["n_pts_r444"] = n_pts_r444
c_alerts_gdf["n_pts_r222"] = n_pts_r222

## Apply Thresholds to identify isolated alerts 

These are more likely to be errors and need some QA. 

In [None]:
#("n_pts_r2222" < 2) or (( "n_pts_r444" < 5) and ("n_pts_r222" < 4)) or ("n_pts_r222" < 3)

n_pts_r2222_msk = (n_pts_r2222 < 2)
n_pts_r444_msk = numpy.logical_and(n_pts_r444 < 5, n_pts_r222 < 4)
n_pts_r222_msk = (n_pts_r222 < 3)

c_alerts_gdf["iso_pt"] = numpy.logical_or(numpy.logical_or(n_pts_r2222_msk, n_pts_r444_msk), n_pts_r222_msk)

## Export the layer with the extra attributes:

In [None]:
out_alerts_vec_lyr = f"gmw_alerts_{c_year}_{c_month_str}_v1_isopts"
out_alerts_vec_file = f"{out_alerts_vec_lyr}.gpkg"

c_alerts_gdf.to_file(out_alerts_vec_file, layer=out_alerts_vec_lyr, driver="GPKG")