# Notebook to Download Monthly Alerts and Merge into a Single File

In [None]:
import os
import numpy
import datetime
import json
import shutil
import geopandas
import pandas
from azure.storage.blob import BlobClient, ContainerClient

In [None]:
def zero_pad_num_str(
    num_val: float,
    str_len: int = 3,
    round_num: bool = False,
    round_n_digts: int = 0,
    integerise: bool = False,
    absolute: bool = False,
    gain: float = 1,
) -> str:
    if absolute:
        num_val = abs(num_val)
    if round_num:
        num_val = round(num_val, round_n_digts)
    if integerise:
        num_val = int(num_val * gain)

    num_str = "{}".format(num_val)
    num_str = num_str.zfill(str_len)
    return num_str

In [None]:
sas_info_file = "/home/jovyan/azure_info.json"
with open(sas_info_file) as f:
    sas_token_info = json.load(f)
    
tmp_dir = "tmp_lcl"
if not os.path.exists(tmp_dir):
    os.mkdir(tmp_dir)

## Define the years and months to be included. 

### Jan 2019 -- Oct 2020

```
years = [2019, 2020, 2021, 2022]
months = [1,2,3,4,5,6,7,8,9,10,11,12]

e_year = 2022
e_month = 11
```

### Jan 2019 -- Dec 2022

```
years = [2019, 2020, 2021, 2022, 2023]
months = [1,2,3,4,5,6,7,8,9,10,11,12]

e_year = 2023
e_month = 1
```


### Jan 2019 -- Jan 2023

```
years = [2019, 2020, 2021, 2022, 2023]
months = [1,2,3,4,5,6,7,8,9,10,11,12]

e_year = 2023
e_month = 2
```



In [None]:
years = [2019, 2020, 2021, 2022]
months = [1,2,3,4,5,6,7,8,9,10,11,12]

e_year = 2022
e_month = 11

## Iterate months downloading the QA'd Alerts and Merging into a single DataFrame:

In [None]:
alerts_lst = list()
for c_year in years:
    print(c_year)
    for c_month in months:
        print(f"\t{c_month}")
        if (c_year == e_year) and (c_month == e_month):
            break
        c_month_str = zero_pad_num_str(c_month, str_len=2)
        # Find check if alerts vector exists for the month specified and download if it does exist
        alerts_vec_lyr = f"{c_year}_{c_month_str}"
        alerts_vec_file = f"gmw_alerts_{alerts_vec_lyr}_qad_v1.parquet.gzip"
        alerts_vec_file_url = os.path.join(sas_token_info["url"], "monthly_alert_qad_vecs", alerts_vec_file)
        alerts_vec_file_url_signed = f"{alerts_vec_file_url}?{sas_token_info['sas_token']}"
        alerts_vec_blob_client = BlobClient.from_blob_url(alerts_vec_file_url_signed)
        if not alerts_vec_blob_client.exists():
            raise Exception("A vector alerts file does not exist for the month/year specified - have you generated?")

        alerts_vec_lcl_file = os.path.join(tmp_dir, alerts_vec_file)
        with open(file=alerts_vec_lcl_file, mode="wb") as download_file:
            download_file.write(alerts_vec_blob_client.download_blob().readall())
        alerts_vec_blob_client = None

        # Read the downloaded vector layer into geopandas
        alerts_gdf = geopandas.read_parquet(alerts_vec_lcl_file)
        alerts_gdf = alerts_gdf.set_crs(epsg=4326, allow_override=True)
              
        alerts_gdf.rename(columns={"obs_year": "firstobsyear",
                                   "obs_year": "firstobsyear", 
                                   "obs_month": "firstobsmonth", 
                                   "obs_day": "firstobsday", 
                                   "obs_date":"first_obs_date", 
                                   "conf_year": "scr5obsyear", 
                                   "conf_month": "scr5obsmonth", 
                                   "conf_day": "scr5obsday", 
                                   "conf_date":"scr5_obs_date", 
                                   "lats":"latitude", 
                                   "lons":"longitude"}, inplace=True)
        
        alerts_gdf["lastobsyear"] = alerts_gdf["scr5obsyear"]
        alerts_gdf["lastobsmonth"] = alerts_gdf["scr5obsmonth"]
        alerts_gdf["lastobsday"] = alerts_gdf["scr5obsday"]
        alerts_gdf["last_obs_date"] = alerts_gdf["scr5_obs_date"]
        alerts_gdf["confident"] = alerts_gdf["score"]
        alerts_gdf["created_at"]= datetime.datetime.today()
        
        alerts_lst.append(alerts_gdf)
    if (c_year == e_year) and (c_month == e_month):
        break

alerts_gdf = pandas.concat(alerts_lst)
alerts_gdf = alerts_gdf.set_crs(epsg=4326, allow_override=True)
alerts_gdf["uid"] = numpy.arange(1, (alerts_gdf.shape[0]) + 1, 1, dtype=int)
alerts_gdf["id"] = alerts_gdf["uid"]
alerts_gdf.drop(columns=["rm_qa"], inplace=True)

## The total number of alerts:

In [None]:
len(alerts_gdf)

## Export to vector layers

In [None]:
out_lyr_name = "gmw_alerts_all_202210_qad_v1"

out_alerts_vec_pq_file = f"{out_lyr_name}.parquet.gzip"
alerts_gdf.to_parquet(out_alerts_vec_pq_file, compression='gzip')

out_alerts_vec_file = f"{out_lyr_name}.gpkg"
alerts_gdf.to_file(out_alerts_vec_file, layer=out_lyr_name, driver="GPKG")


## Remove Temporary Directory

In [None]:
shutil.rmtree(tmp_dir)