# Apply QA Edits

This notebook downloads the vector alerts for a month and associated QA edits and applies those QA edits writing the output files back to the Azure container.

In [None]:
import os
import numpy
import datetime
import json
import shutil
import geopandas
import pandas
from azure.storage.blob import BlobClient, ContainerClient

In [None]:
def zero_pad_num_str(
    num_val: float,
    str_len: int = 3,
    round_num: bool = False,
    round_n_digts: int = 0,
    integerise: bool = False,
    absolute: bool = False,
    gain: float = 1,
) -> str:
    if absolute:
        num_val = abs(num_val)
    if round_num:
        num_val = round(num_val, round_n_digts)
    if integerise:
        num_val = int(num_val * gain)

    num_str = "{}".format(num_val)
    num_str = num_str.zfill(str_len)
    return num_str

## Read Azure authentication info and create a local temporary directory

In [None]:
sas_info_file = "/home/jovyan/azure_info.json"
with open(sas_info_file) as f:
    sas_token_info = json.load(f)
    
tmp_dir = "tmp_lcl"
if not os.path.exists(tmp_dir):
    os.mkdir(tmp_dir)

## Specify the month and year to be processed

### January 2019:
```
c_month = 1
c_year = 2019
```
### June 2020:
```
c_month = 6
c_year = 2020
```
### December 2021:
```
c_month = 12
c_year = 2021
```

In [None]:
c_month = 11
c_year = 2022
c_month_str = zero_pad_num_str(c_month, str_len=2)

## Specify whether to upload to Azure and overwrite any existing files:

In [None]:
upload = True
overwrite_azure = False

## Find Alerts Vector File from Azure: Download and Load in Geopandas:

In [None]:
# Find check if alerts vector exists for the month specified and download if it does exist
alerts_vec_lyr = f"{c_year}_{c_month_str}"
alerts_vec_file = f"gmw_alerts_{alerts_vec_lyr}_v1.parquet.gzip"
alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_vecs", alerts_vec_file)
alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
alerts_vec_blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
if not alerts_vec_blob_client.exists():
    raise Exception("A vector alerts file does not exist for the month/year specified - have you generated?")

alerts_vec_lcl_file = os.path.join(tmp_dir, alerts_vec_file)
with open(file=alerts_vec_lcl_file, mode="wb") as download_file:
    download_file.write(alerts_vec_blob_client.download_blob().readall())
alerts_vec_blob_client = None

# Read the downloaded vector layer into geopandas
alerts_gdf = geopandas.read_parquet(alerts_vec_lcl_file)
alerts_gdf = alerts_gdf.set_crs(epsg=4326, allow_override=True)

## Find Alerts QA Vector File from Azure: Download and Load in Geopandas if exists:

In [None]:
# Find check if alerts QA vector exists for the month specified and download if it does exist
alerts_qa_vec_lyr = f"gmw_alerts_qa_{c_year}{c_month_str}"
alerts_qa_vec_file = f"{alerts_qa_vec_lyr}.geojson"
alerts_qa_vec_file_url = os.path.join(sas_token_info["url"], "monthly_qa_edit_vecs", alerts_qa_vec_file)
alerts_qa_vec_file_url_signed = f"{alerts_qa_vec_file_url}?{sas_token_info['sas_token']}"
alerts_qa_vec_blob_client = BlobClient.from_blob_url(alerts_qa_vec_file_url_signed)
alerts_qa_exists = alerts_qa_vec_blob_client.exists()
alerts_qa_vec_lcl_file = os.path.join(tmp_dir, alerts_qa_vec_file)
if alerts_qa_exists:  
    with open(file=alerts_qa_vec_lcl_file, mode="wb") as download_file:
        download_file.write(alerts_qa_vec_blob_client.download_blob().readall())
alerts_qa_vec_blob_client = None

## If QA polyons are available then remove intersecting points from alerts vector:

In [None]:
if alerts_qa_exists:
    alerts_qa_gdf = geopandas.read_file(alerts_qa_vec_lcl_file)
    alerts_qa_gdf = alerts_qa_gdf.set_crs(epsg=4326, allow_override=True)
    n_alerts_before = len(alerts_gdf)
    
    # Add column with unique id for each row.
    alerts_gdf["uid_tmp"] = numpy.arange(1, (alerts_gdf.shape[0]) + 1, 1, dtype=int)
    # Perform selection
    sel_alerts_gdf = geopandas.sjoin(
        alerts_gdf, alerts_qa_gdf, how="inner", predicate="intersects"
    )
    # Remove any duplicate features using the tmp column
    sel_alerts_gdf.drop_duplicates(subset=["uid_tmp"], inplace=True)
    # Create new column with the selection populated as True.
    alerts_gdf["rm_qa"] = alerts_gdf["uid_tmp"].isin(sel_alerts_gdf["uid_tmp"].values)
    alerts_gdf = alerts_gdf[~alerts_gdf["rm_qa"]]
    # Remove the tmp column
    alerts_gdf.drop(columns=["uid_tmp"], inplace=True)
    n_alerts_after = len(alerts_gdf)
    print(f"Alerts reduced from {n_alerts_before} to {n_alerts_after}.")

## If the number of alert points is > 0 then export to vector layer and upload to Azure:

In [None]:
if len(alerts_gdf) > 0:
    alerts_vec_lyr = f"{c_year}_{c_month_str}"
    
    out_alerts_vec_pq_file = f"gmw_alerts_{alerts_vec_lyr}_qad_v1.parquet.gzip"
    out_alerts_vec_pq_file_path = os.path.join(tmp_dir, out_alerts_vec_pq_file)
    
    out_alerts_vec_file = f"gmw_alerts_{alerts_vec_lyr}_qad_v1.gpkg"
    out_alerts_vec_file_path = os.path.join(tmp_dir, out_alerts_vec_file)
    
    alerts_gdf.to_file(out_alerts_vec_file_path, layer=alerts_vec_lyr, driver="GPKG")
    alerts_gdf.to_parquet(out_alerts_vec_pq_file_path, compression='gzip')
    
    if upload:
        if upload and os.path.exists(out_alerts_vec_file_path):
            alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_qad_vecs", out_alerts_vec_file)
            alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
            blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
            with open(out_alerts_vec_file_path, 'rb') as data:
                blob_client.upload_blob(data, overwrite=overwrite_azure)
            blob_client = None
        if upload and os.path.exists(out_alerts_vec_pq_file_path):
            alerts_pq_file_url = os.path.join(sas_token_info["url"], "monthly_alert_qad_vecs", out_alerts_vec_pq_file)
            alerts_pq_file_url_signed = f"{alerts_pq_file_url}?{sas_token_info['sas_token']}"
            blob_client = BlobClient.from_blob_url(alerts_pq_file_url_signed)
            with open(out_alerts_vec_pq_file_path, 'rb') as data:
                blob_client.upload_blob(data, overwrite=overwrite_azure)
            blob_client = None
    

## Remove the Temporay Directory

In [None]:
shutil.rmtree(tmp_dir)