## INITIALIZE REQUIREMENTS

### Load Dependencies

In [1]:
%matplotlib inline
# !pip install python-dotenv
# load_dotenv()
# from dotenv import load_dotenv

import os, glob, warnings, datacube, rasterio, folium, json
import numpy as np
import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as rio
import pandas as pd
from rasterio.merge import merge
from rasterio.plot import show
from shapely.geometry import Point
from shapely.geometry import Polygon


from scipy.ndimage import uniform_filter
from scipy.ndimage import variance
from skimage.filters import threshold_minimum
from datacube.utils.geometry import Geometry

from deafrica_tools.spatial import xr_rasterize
from deafrica_tools.datahandling import load_ard
from deafrica_tools.plotting import display_map, rgb
from deafrica_tools.areaofinterest import define_area

 # G-Drive Dependencies
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account

from IPython.display import clear_output
from IPython.display import display

warnings.filterwarnings("ignore")

In [2]:
# G-Drive Scopes and Credentials
# 'creds' folder contains credentials.json - service account, credentials_2.json - Desktop APP and token.json - user account.
SCOPES = ["https://www.googleapis.com/auth/drive"]
credential_path = "../creds/credentials_2.json"
u_credential_path = "../creds/token.json"

### Connect to the datacube

Connect to the datacube so we can access DEA data.
The `app` parameter is a unique name for the analysis which is based on the notebook file name.

In [3]:
dc = datacube.Datacube(app="Radar_water_detection")

### G-Drive Folder IDs and Timerange Definitions

In [4]:
# Create folder ids by copying the ID from the g-drive folder url
FLOOD_MEAN_ID = "196YsHy1SXjNDnja6LjVhhlvoIt-Jg91l"
FLOOD_MEDIAN_ID = "1Qhum99pKi1Qyon8hcp4K8S_DGaJh5a5e"

PREFLOOD_MEAN_ID = "1K0KqGlLxdUCsXg4771k_uf_x2UwbHlqa"
PREFLOOD_MEDIAN_ID = "1Ovu5Q58xZRGpKsvowVr49klOrgVoZ0XQ"

FE_MEDIAN_ID = "1CJfoCyWdcUo92nVyehWa2QQ-vshM9FTM"

FLOOD_MEAN_TEST_ID = "1bv76i244wyJzfpWN57wM7Yh4jQ38gove"
PREFLOOD_MEAN_TEST_ID = "1msS6VuX_8UptluKIuQG5PJNOrwqYtacY"

REF_TEST_PRF_ID = "1-GUFFarvB1jpuTBIJ-qJAPkVroVDLXk8"
REF_TEST_F_ID = "10rwk3OW_fBL8ws7K7kt4P5EFjzpuyWp4"
FE_EXT_ID = "1h9oOMWxi6Q18Vbwv-tVOGO6UiQSOX3Tj"

In [5]:
# Define main time period of analysis
timerange = ('2024-02', '2024-09')

# Define sub-periods of analysis - should be within main time period
pre_flood = ['2024-02', '2024-03', '2024-04'] 
flood = ['2024-05', '2024-06', '2024-07', '2024-08', '2024-09'] 

# Run 1. aoi-threshold.ipynb to get the value of th_aoi and store it here.
th_aoi = -27.395682

### G-Drive Function Definitions

In [6]:
def create_user_token():
    # Creates a user token. On first run, run locally to generate token.json and add to root.
    creds = None

    if os.path.exists(u_credential_path):
        creds = Credentials.from_authorized_user_file(u_credential_path, SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(credential_path, SCOPES)
            creds = flow.run_local_server(port=0)

    # Save the credentials for the next run
    with open(u_credential_path, "w") as token:
        token.write(creds.to_json())
    return creds


def create_token():
    # Creates service token. Will be depreciated
    creds = None
    creds = service_account.Credentials.from_service_account_file(
        credential_path, scopes=SCOPES
    )
    return creds


def list_gdrive():
    # Lists all the files and folders in the root directory of g-drive.
    try:
        results = (
            service.files()
            .list(pageSize=20, fields="nextPageToken, files(id, name)")
            .execute()
        )
        items = results.get("files", [])
        if not items:
            print("No files found.")
            return
        print("Files:")
        for item in items:
            print(f"{item['name']} ({item['id']})")
    except HttpError as error:
        print(f"An error occurred: {error}")


def upload_to_gdrive(file_paths, folder_id=None):
    # Uploads list of files in file_paths to folder_id
    for f_path in file_paths:
        try:
            # Create g-drive API client using desktop app and user credentials
            folder_path = "../Supplementary_data/DriveCredentials/{}".format(folder_id)

            f_name = os.path.basename(f_path)
            file_metadata = {"name": f_name, "parents": [folder_id]}
            
            media = MediaFileUpload(f_path, chunksize=10485760, resumable=True) # 10MB chuncksize
            
            # pylint: disable=maybe-no-member
            file = (
                service.files().create(body=file_metadata, media_body=media).execute()
            )
            print("\033[32m" + "{} UPLOADED SUCCESSFULLY".format(f_name) + "\033[0m")

            # Delete from sandbox to save disc space
            os.remove(f_path)
        except HttpError as error:
            print(f"An error occurred: {error}")
            file = None
    return

def delete_files(file_id): 
    results = service.files().list(q = "'{}' in parents".format(file_id)).execute()
    items = results.get('files', [])
    for item in items:
        try:
            response = service.files().delete(fileId=item['id']).execute()
            print('FILE DELETED SUCCESSFULLY')
        except HttpError as error:
            print(f"An error occurred: {error}")

def get_storage():
    storage = service.about().get(fields = "storageQuota").execute()
    print("G-Drive Storage Capacity: {}TB".format(round(int(storage['storageQuota']['limit'])/10**12)))
    print("Total Used: {}GB".format(round(int(storage['storageQuota']['usage'])/10**9)))
    print("In Drive: {}GB".format(round(int(storage['storageQuota']['usageInDrive'])/10**9)))
    print("In Trash: {}GB".format(round(int(storage['storageQuota']['usageInDriveTrash'])/10**9)))


u_creds = create_user_token()
service = build("drive", "v3", credentials=u_creds)

In [7]:
get_storage()

G-Drive Storage Capacity: 2TB
Total Used: 83GB
In Drive: 83GB
In Trash: 0GB


### Filter and Classifier Functions

In [8]:
# Function to apply lee filtering on S1 image. Speckle Filter
def lee_filter(da, size):
    """
    Apply lee filter of specified window size.
    Adapted from https://stackoverflow.com/questions/39785970/speckle-lee-filter-in-python

    """
    img = da.values
    img_mean = uniform_filter(img, size)
    img_sqr_mean = uniform_filter(img**2, size)
    img_variance = img_sqr_mean - img_mean**2

    overall_variance = variance(img)

    img_weights = img_variance / (img_variance + overall_variance)
    img_output = img_mean + img_weights * (img - img_mean)

    return img_output

# Classifier Function
def S1_water_classifier(da, threshold):
    water_data_array = da < threshold
    return water_data_array.to_dataset(name="s1_water")

In [9]:
# TODO - Add mean and median generators as functions

In [10]:
def iterate_grid(aoi_m, c):
    
    PRF_FOLDER_ID = PREFLOOD_MEDIAN_ID
    F_FOLDER_ID = FLOOD_MEDIAN_ID
    FE_FOLDER_ID = FE_MEDIAN_ID
    
    e_log = []
    cell = 1
    for aoi, i in zip(aoi_m, c):
        geopolygon = Geometry(aoi["features"][0]["geometry"], crs="epsg:4326")
        geopolygon_gdf = gpd.GeoDataFrame(geometry=[geopolygon], crs=geopolygon.crs)
        g = geopolygon_gdf.centroid
        print(
            "\n\n"
            + "\033[32m"
            + "PROCESSING GRID CELL ID {} NO. {}/{} CENTROID ({}, {})".format(
                i[2], cell, len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
            )
            + "\033[0m"
        )
    
        # Get the latitude and longitude range of the geopolygon
        lat_range = (geopolygon_gdf.total_bounds[1], geopolygon_gdf.total_bounds[3])
        lon_range = (geopolygon_gdf.total_bounds[0], geopolygon_gdf.total_bounds[2])
    
        # Load Sentinel1 data
        try:
            S1 = load_ard(
                dc=dc,
                products=["s1_rtc"],
                # measurements=["vv", "vh"],
                measurements=["vh"],
                y=lat_range,
                x=lon_range,
                time=timerange,
                output_crs="EPSG:6933",
                resolution=(-20, 20),
                group_by="solar_day",
                dtype="native",
            )
        except Exception as e:
            # Log error aoi centroids and keep looping
            e_log.append([g.x[0], g.y[0], i[2], "P"])
            print(
                "\n\n"
                + "\033[31m"
                + "ERROR PROCESSING GRID CELL {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                    i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                )
                + "\033[0m"
            )
            print(e)
            cell += 1
            continue
    
        timesteps = [2, 4, 6, 9, 11]
    
        # The lee filter above doesn't handle null values
        # We therefore set null values to 0 before applying the filter
        valid = np.isfinite(S1)
        S1 = S1.where(valid, 0)
    
        # Create a new entry in dataset corresponding to filtered VV and VH data
        S1["filtered_vh"] = S1.vh.groupby("time").apply(lee_filter, size=7)
    
        # Null pixels should remain null
        S1["filtered_vh"] = S1.filtered_vh.where(valid.vh)
    
        # Convert the digital numbers to dB
        S1["filtered_vh"] = 10 * np.log10(S1.filtered_vh)
    
        threshold_vh = th_aoi
    
        S1["water"] = S1_water_classifier(S1.filtered_vh, threshold_vh).s1_water
        FS1 = S1.water
        PRFS1 = S1.water
    
        # Creting outputs
        # Export to raster - upload to g-drive - delete from sandbox
        
        # --------------------------------------- preflood ----------------------------------------------
        if i[3] in [None, "P", "U-PRF"]:
            S1_PreFlood = PRFS1.sel(time=pre_flood, method="nearest").median(dim="time")
            preflood_val = "CELL_" + str(i[2]) + "_PRE_FLOOD_MEDIAN"
            preflood_name = preflood_val + ".tif"
            preflood_out = "output/preflood/" + preflood_name
            S1_PreFlood.rio.to_raster(preflood_out)
            
            # preflood meta
            prf_meta_dict = {
                "GRID_CELL_ID": i[2],
                "start_time": pre_flood[0],
                "end_time": pre_flood[-1],
                "lat": lat_range, 
                "lon": lon_range,
                "centroid": "{}, {}".format(g.y[0], g.x[0]),
                "crs": str(geopolygon.crs)
                }
            
            text_flie_name = preflood_val + "_META.json"
            prf_meta_path = "output/preflood/" + text_flie_name
            with open(prf_meta_path, 'w') as f:
                json.dump(prf_meta_dict, f)
            
            try:
                upload_to_gdrive([preflood_out, prf_meta_path], PRF_FOLDER_ID)
            except Exception as e:
                e_log.append([g.x[0], g.y[0], i[2], "U-PRF"])
                print(
                    "\n\n"
                    + "\033[31m"
                    + "ERROR UPLOADING GRID CELL ID {} NO.  {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                        cell, i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                    )
                    + "\033[0m"
                )
                print(e)
                cell += 1
    
        # ----------------------------------------- flood ----------------------------------------------
        if i[3] in [None, "P", "U-F"]:
            S1_Flood = FS1.sel(time=flood, method="nearest").median(dim="time")
            flood_val = "CELL_" + str(i[2]) + "_FLOOD_MEDIAN"
            flood_name = flood_val + ".tif"
            flood_out = "output/flood/" + flood_name
            S1_Flood.rio.to_raster(flood_out)
            
    
            # flood meta
            f_meta_dict = {
                "GRID_CELL_ID": i[2],
                "start_time": flood[0],
                "end_time": flood[-1],
                "lat": lat_range, 
                "lon": lon_range,
                "centroid": "{}, {}".format(g.y[0], g.x[0]),
                "crs": str(geopolygon.crs)
                }
            
            text_flie_name = flood_val + "_META.json"
            f_meta_path = "output/flood/" + text_flie_name
            with open(f_meta_path, 'w') as f:
                json.dump(f_meta_dict, f)
            
            try:
                upload_to_gdrive([flood_out, f_meta_path], F_FOLDER_ID)
            except Exception as e:
                e_log.append([g.x[0], g.y[0], i[2], "U-F"])
                print(
                    "\n\n"
                    + "\033[31m"
                    + "ERROR UPLOADING GRID CELL ID {} NO. {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                        cell, i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                    )
                    + "\033[0m"
                )
                print(e)
                cell += 1


        # ----------------------------------------- difference ----------------------------------------------
        if i[3] in [None, "P", "U-FE"]:
            S1_FE = S1_Flood - S1_PreFlood
            fe_val = "CELL_" + str(i[2]) + "_FE_MEAN"
            fe_name = fe_val + ".tif"
            fe_out = "output/fe/" + fe_name
            S1_FE.rio.to_raster(fe_out)
            
    
            # flood meta
            fe_meta_dict = {
                "GRID_CELL_ID": i[2],
                "start_time": timerange[0],
                "end_time": timerange[-1],
                "lat": lat_range, 
                "lon": lon_range,
                "centroid": "{}, {}".format(g.y[0], g.x[0]),
                "crs": str(geopolygon.crs)
                }
            
            text_flie_name = fe_val + "_META.json"
            fe_meta_path = "output/fe/" + text_flie_name
            with open(fe_meta_path, 'w') as f:
                json.dump(fe_meta_dict, f)
            
            try:
                upload_to_gdrive([fe_out, fe_meta_path], FE_FOLDER_ID)
            except Exception as e:
                e_log.append([g.x[0], g.y[0], i[2], "U-FE"])
                print(
                    "\n\n"
                    + "\033[31m"
                    + "ERROR UPLOADING GRID CELL ID {} NO. {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                        cell, i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                    )
                    + "\033[0m"
                )
                print(e)
                cell += 1

        cell += 1
        clear_output()
    
    if len(e_log) == 0:
        print("\n\n" + "\033[32m" + "GRID PROCESSED AND UPLOADED SUCCESSFULLY" + "\033[0m" + "\n\n")

    # return e_log to be run again
    return e_log

In [11]:
# Crete the aoi-mosaic - aoi_m
def gen_aoim(c, b):
    aoi_m = []
    for i in c:
        aoi_m.append(define_area(i[1], i[0], buffer=b))
    # print(c, len(aoi_m))
    e_log = iterate_grid(aoi_m, c)

    # return e_log to be run again
    return e_log

In [12]:
# Visualize input file
def view_input(gdf_list, grid_c):
    p = gdf_list[0].dissolve()
    center = p.centroid
    map = folium.Map(location=[center.y, center.x], tiles="CartoDB Positron")
    
    for gdf in gdf_list:
        folium.GeoJson(gdf, name="{}".format(gdf)).add_to(map)

    for c in grid_c:
        folium.Marker(
            location=[c[1], c[0]],
            popup=f"Centroid: {c[1]}, {c[0]}",
            icon= folium.DivIcon(
                icon_size=(10, 10),
                icon_anchor=(0,0),
                html='<div style="font-size: 10pt">{}</div>'.format(c[2]),
                )
        ).add_to(map)
    
    bounds = gdf_list[0].total_bounds.tolist()
    map.fit_bounds([bounds[:2][::-1], bounds[2:][::-1]])
    display(map)

In [13]:
# Create grid
from shapely import intersection as intersect
def create_grid(adm0, size):
    bounds = adm0.bounds
    minx = bounds.minx[0] #only 1 feature at the 0th index
    miny = bounds.miny[0]
    maxx = bounds.maxx[0]
    maxy = bounds.maxy[0]

    grid = gpd.GeoDataFrame()
    for x0 in np.arange(minx, maxx, size):
        for y0 in np.arange(miny, maxy, size):
            x1 = x0 + size
            y1 = y0 + size
            d = {'geometry': [Polygon([(x0, y0), (x1, y0), (x1, y1), (x0, y1)])]}
            cell = gpd.GeoDataFrame(d, crs="EPSG:4326")
            flag = adm0.intersection(cell)
            if flag[0].is_empty == False:
                grid = pd.concat([grid, cell])
                
    return grid

In [14]:
# Check CRS and convert to 4326 if required
def crs_check(shp):
    shp = gpd.read_file(shp)
    if shp.crs != "EPSG:4326":
        print("Added ADM0 CRS is {}. Converting to EPSG:4326...".format(shp.crs))
        shp = shp.to_crs("EPSG:4326")
        if shp.crs == "EPSG:4326":
            print("Done")

    return shp

### Upload Gridded Vector File

In [30]:
# Load file from sandbox disc. file should be present in 'input' folder
# grid = gpd.read_file("input/Lake Chad.geojson")
# grid = gpd.read_file("input/TCD_55KM_4CTEST.geojson")
# grid = gpd.read_file("input/TCD_55KM_BASE.geojson")
# grid = gpd.read_file("input/TCD_55KM_ERR.geojson")

shp = "input/TCD_adm0.geojson"
adm0 = crs_check(shp)
size = 0.5  # Grid cell size 0.5 ~ 55KM
grid = create_grid(adm0, size)

# Calculate centroids and store in centroid list c[].
c = []
g = grid.centroid

cell_id = 1
for i in g:
    c.append(
        [round(i.x, 5), round(i.y, 5), cell_id, None]
    )  # The array c[] has four values: x, y, cell_id and None. None will store the "P" or "U" error value
    cell_id += 1

# # REMOVE THIS AFTER DONE
# c = c[:10]
view_input([grid, adm0], c)

### Run Application

In [13]:
# get e_log with centroids, cell_id and error message
# Calling gen_aoim will run the entire Application
e_log = gen_aoim(c, size/2)

print(len(e_log))

212


In [17]:
# Write error log to disc
# e_log = np.array(e_log)
# with open('error_centroids.json', 'w') as filehandle:
#     json.dump(e_log.tolist(), filehandle)
ERR_FOLDER_ID="12ma2wfk79Vue8hgigPEu_QwGm_dWxqZE"

#read error log from disk
with open('error_centroids.json') as f:
    e_log = json.load(f)
for e in e_log:
    e[0] = float(e[0])
    e[1] = float(e[1])
    e[2] = int(e[2])

e_log

try:
    upload_to_gdrive(["error_centroids.json"], ERR_FOLDER_ID)
except Exception as e:
    print("FAILED TO UPLOAD ERROR LOG FILE REASON:{}".format(e))

[32merror_centroids.json UPLOADED SUCCESSFULLY[0m


FileNotFoundError: [Errno 2] No such file or directory: 'error_centroids.json'

In [39]:
# Visualize Error Cells
e_grid = gpd.GeoDataFrame()
for e in e_log:
    point = Point(e[0], e[1]) #This takes x first and then y
    gdf = gpd.GeoDataFrame(geometry=[point])
    buffer = 0.25
    cell = gpd.GeoDataFrame()
    cell['geometry'] = gdf.buffer(buffer, cap_style='square')
    e_grid = pd.concat([e_grid, cell])
e_grid = e_grid.set_crs('epsg:4326') #e_grid with same cell size as main grid

# e_grid_aoi = e_grid.dissolve()
# e_grid_fine = create_grid(e_grid_aoi, size) # if require to change the size and make it finer

view_input([e_grid], e_log)
    

In [None]:
# Re-run application for cells logged in e_log
if len(e_log)>0:
    e_log = gen_aoim(e_log, size/2)

## Write Outputs To Disc

In [None]:
dirs = ['flood', 'preflood', 'postflood']

for dir in dirs:
    loc = "output/" + dir
    out = "output/{}/Merged_{}.tif".format(dir, dir)
    extension = "*.tif"
    q = os.path.join(loc, extension)
    files = glob.glob(q)

    r =[]
    for f in files:
        s = rasterio.open(f)
        r.append(s)
    if len(r)>0:
        mosaic, out_trans = merge(r)
        out_meta = s.meta.copy()
        out_meta.update({"driver": "GTiff",
                    "height": mosaic.shape[1],
                    "width": mosaic.shape[2],
                    "transform": out_trans
                    })
        with rasterio.open(out, "w", **out_meta) as dest:
            dest.write(mosaic)
            # upload_to_gdrive(out, "flood")

In [None]:
creds = create_user_token()
service = build("drive", "v3", credentials=creds)
storage = service.about().get(fields = "storageQuota").execute()
print("Storage Capacity: {}TB".format(round(int(storage['storageQuota']['limit'])/10**12)))